forked from azk0019/CourseProject
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse.py
More file actions
26 lines (19 loc) · 716 Bytes
/
parse.py
File metadata and controls
26 lines (19 loc) · 716 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import glob
# returns 2d array with fields: videoname, offset, timestamp, and document
def getDocs():
docs = []
for fname in glob.glob("data/*.srt"):
with open(fname) as data:
#lines = [line.split('\n') for line in data.read().split('\n\n')]
lines = [line for line in data.read().split('\n\n')]
for l in lines:
record = [fname.split('/')[1].split('_')[1].split('.')[0]]
record += l.split('\n')
record[3:] = [' '.join(record[3:])]
if record[-1] != '':
docs.append(record)
return docs
docs = getDocs()
print(docs[0])
print(docs[137:140])
print(len(docs))