-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathESC_convert_metadata.py
More file actions
58 lines (53 loc) · 2.23 KB
/
ESC_convert_metadata.py
File metadata and controls
58 lines (53 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from io import StringIO
import json
import pandas as pd
import sys
from tqdm import tqdm
sys.stderr.write("\nExtracting metadata from ESC csv file\n")
metadata = pd.read_csv("F1_genome_metadata.csv")
with open("FILTERED_MD_FINAL_ALL.tab", "rb") as accessFile:
additions = accessFile.read().splitlines()[1:]
Ids = []
accessions = []
for line in additions:
line = line.decode('windows-1252')
Ids.append(line.split("\t")[0].upper())
accessions.append(line.split("\t")[15])
ERS_dict = {}
ENA_accessions = []
unkown_accessions = []
for index, row in tqdm(metadata.iterrows()):
if not row["ID"] == "":
if "ESC_" in row["ID"]:
try:
accession = accessions[list(Ids).index(row["ID"])]
except ValueError:
accession = row["ID"]
else:
accession = row["ID"]
ERS_dict[accession] = {"Assembly_name": row["Assembly_name"],
"PopPUNK": row["PopPUNK"],
"ST": row["ST"],
"MDR": row["MDR"],
"Ab_classes": row["Ab_classes"],
"Pathotype": row["Pathotype"],
"Phylogroup": row["Phylogroup"],
"Isolation": row["Isolation"],
"Country": row["Country"],
"Continent": row["Continent"],
"Total_AMR_genes": row["Total_AMR_genes"],
"Total_virulence_genes": row["Total_virulence_genes"],
"Pathotype_Vir_genes": row["Pathotype_Vir_genes"],
"Other_Vir_genes": row["Other_Vir_genes"],
"AMR_genes": row["AMR_genes"]}
if "ESC_" in accession or "," in accession:
unkown_accessions.append(accession)
else:
ENA_accessions.append(accession)
with open("ESC_METADATA.json", "w") as m:
m.write(json.dumps(ERS_dict))
with open("ESC_ENA_Accessions.txt", "w") as e:
e.write("\n".join(ENA_accessions))
with open("ESC_UNKNOWN_Accessions.txt", "w") as e:
e.write("\n".join(unkown_accessions))
sys.stderr.write("Done\n")