-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathlo_assem_to_ref.py
More file actions
84 lines (73 loc) · 2.77 KB
/
lo_assem_to_ref.py
File metadata and controls
84 lines (73 loc) · 2.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# %%
#generate bed file that contains positions on ref to be lifted to contigs
import csv
import math
#pysam: https://pysam.readthedocs.io/en/latest/usage.html
import pysam
#print(pysam.__version__)
import sys
#get command line input
#n = len(sys.argv)
output_dir = sys.argv[1] + "/"
assembly_bam_file_hap1 = sys.argv[2]
assembly_bam_file_hap2 = sys.argv[3]
#print("\nName of Python script:", sys.argv[0])
interval = 20
#check if k-th bit of a given number is set or not
def isKthBitSet(n, k):
if n & (1 << (k - 1)):
return True
else:
return False
#for assem1
#generate bed file liftover
#TODO: change output
g = open(output_dir + "lo_pos_assem1.bed", "w")
samfile = pysam.AlignmentFile(assembly_bam_file_hap1, "rb")
for record in samfile:
#TODO: solve the unmapped segment problem???
if not isKthBitSet(record.flag, 3):
#samLiftover's bed file needs ref name for both directions
ref_name = record.reference_name
query_name = record.query_name
#TODO: check not matching with paf
query_start = record.query_alignment_start
query_end = record.query_alignment_end
ref_start = record.reference_start
ref_end = record.reference_end
query_start = math.ceil(query_start/interval) * interval
query_end = math.floor(query_end/interval) * interval
ref_start = math.ceil(ref_start/interval) * interval
ref_end = math.floor(ref_end/interval) * interval
for i in range(ref_start, ref_end + interval, interval):
g.write(ref_name + "\t")
g.write(str(i) + "\t")
g.write(str(i+1) + "\t")
g.write(query_name + "\t")
g.write("\n")
g.close()
#for assem2
g = open(output_dir + "lo_pos_assem2.bed", "w")
samfile = pysam.AlignmentFile(assembly_bam_file_hap2, "rb")
for record in samfile:
#TODO: solve the unmapped segment problem???
if not isKthBitSet(record.flag, 3):
#samLiftover's bed file needs ref name for both directions
ref_name = record.reference_name
query_name = record.query_name
#TODO: check not matching with paf
query_start = record.query_alignment_start
query_end = record.query_alignment_end
ref_start = record.reference_start
ref_end = record.reference_end
query_start = math.ceil(query_start/interval) * interval
query_end = math.floor(query_end/interval) * interval
ref_start = math.ceil(ref_start/interval) * interval
ref_end = math.floor(ref_end/interval) * interval
for i in range(ref_start, ref_end + interval, interval):
g.write(ref_name + "\t")
g.write(str(i) + "\t")
g.write(str(i+1) + "\t")
g.write(query_name + "\t")
g.write("\n")
g.close()