-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample_rest_api.py
More file actions
124 lines (98 loc) · 4.67 KB
/
example_rest_api.py
File metadata and controls
124 lines (98 loc) · 4.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Example usage of the DigestedProteinDB REST API
https://digestedproteindb.pbf.hr/rest.php
"""
import requests
BASE_URL = "https://digestedproteindb.pbf.hr/search.php"
TAXONOMY_URL = "https://digestedproteindb.pbf.hr/search-taxonomy.php"
# Monoisotopic masses of amino acid residues (Da)
AA_MASS = {
'A': 71.03711, 'R': 156.10111, 'N': 114.04293, 'D': 115.02694,
'C': 103.00919, 'E': 129.04259, 'Q': 128.05858, 'G': 57.02146,
'H': 137.05891, 'I': 113.08406, 'L': 113.08406, 'K': 128.09496,
'M': 131.04049, 'F': 147.06841, 'P': 97.05276, 'S': 87.03203,
'T': 101.04768, 'W': 186.07931, 'Y': 163.06333, 'V': 99.06841,
}
WATER = 18.01056 # H2O added to complete the peptide chain
def peptide_mass(sequence: str) -> float:
"""Calculate the monoisotopic mass of a peptide sequence."""
return sum(AA_MASS[aa] for aa in sequence.upper()) + WATER
def search_by_mass(mass1: float, mass2: float, page: int = 1, page_size: int = 10) -> dict:
"""Search peptides by mass range. Returns plain accession numbers."""
params = {"mass1": mass1, "mass2": mass2, "page": page, "pageSize": page_size}
response = requests.get(BASE_URL, params=params)
response.raise_for_status()
return response.json()
def search_by_peptide(sequence: str, tolerance_da: float = 0.02, page_size: int = 10) -> dict:
"""Search by peptide sequence: compute its mass locally, then query with ± tolerance."""
mass = peptide_mass(sequence)
print(f" Calculated mass for '{sequence}': {mass:.5f} Da (±{tolerance_da} Da)")
return search_by_mass(mass - tolerance_da, mass + tolerance_da, page_size=page_size)
def search_by_mass_taxonomy(mass1: float, mass2: float, page: int = 1, page_size: int = 10) -> dict:
"""Search peptides by mass range with NCBI taxonomy ID per accession."""
params = {"mass1": mass1, "mass2": mass2, "page": page, "pageSize": page_size}
response = requests.get(TAXONOMY_URL, params=params)
response.raise_for_status()
return response.json()
def print_results(data: dict):
"""Print search results (plain accession format)."""
print(f"Total results : {data.get('totalResult', 0)}")
print(f"Page : {data.get('page')} / pageSize={data.get('pageSize')}")
print(f"Duration : {data.get('duration')}")
print(f"Memory : {data.get('memory')}")
print("-" * 65)
for item in data.get("result", []):
for mass, peptides in item.items():
for pep in peptides:
seq = pep.get("seq", "?")
accs = ", ".join(pep.get("acc", []))
print(f" Mass: {mass:<12} Sequence: {seq:<20} Acc: {accs}")
def print_results_taxonomy(data: dict):
"""Print search results including NCBI taxonomy IDs."""
print(f"Total results : {data.get('totalResult', 0)}")
print(f"Page : {data.get('page')} / pageSize={data.get('pageSize')}")
print(f"Duration : {data.get('duration')}")
print(f"Memory : {data.get('memory')}")
print("-" * 75)
for item in data.get("result", []):
for mass, peptides in item.items():
for pep in peptides:
seq = pep.get("seq", "?")
# accsTax: list of {"acc": "...", "taxId": 12345}
accs_tax = pep.get("accsTax", [])
accs_str = ", ".join(
f"{e['acc']} (taxId:{e['taxId']})" for e in accs_tax
)
print(f" Mass: {mass:<12} Sequence: {seq:<20} {accs_str}")
if __name__ == "__main__":
# --- Example 1: search by mass range ---
print("=" * 65)
print("Example 1: search by mass range (1247.50 – 1247.52)")
print("=" * 65)
data = search_by_mass(mass1=1247.50, mass2=1247.52, page_size=5)
print_results(data)
print()
# --- Example 2: search by peptide sequence ---
peptide = "SYTFHFKYR"
print("=" * 65)
print(f"Example 2: search by peptide sequence '{peptide}'")
print("=" * 65)
data = search_by_peptide(peptide, tolerance_da=0.02, page_size=5)
print_results(data)
print()
# --- Example 3: pagination – fetch page 2 ---
print("=" * 65)
print("Example 3: pagination – page 2, mass range 1800.00 – 1800.02")
print("=" * 65)
data = search_by_mass(mass1=1800.00, mass2=1800.02, page=2, page_size=5)
print_results(data)
print()
# --- Example 4: search with taxonomy (NCBI taxId per accession) ---
print("=" * 65)
print("Example 4: search with taxonomy (1247.60 – 1247.62)")
print("=" * 65)
try:
data = search_by_mass_taxonomy(mass1=1247.60, mass2=1247.62, page_size=5)
print_results_taxonomy(data)
except Exception as e:
print(f" Taxonomy endpoint not available: {e}")