-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
116 lines (88 loc) · 3.34 KB
/
main.py
File metadata and controls
116 lines (88 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from bs4 import BeautifulSoup
import urllib.request
import re
from gensim.summarization import summarize
from flask import Flask, redirect, render_template, request, session, url_for
import LinkedLists as ll
app = Flask(__name__,template_folder='templates')
class chunk:
def __init__(self, ts, cont):
self.timestamp = ts
self.content = cont
def getTimestamp(self):
return self.timestamp;
def getContent(self):
return self.content
def __str__(self):
print(self.timestamp + " : " + self.content)
@app.route('/')
def index():
video_id = getVideoId('https://www.youtube.com/watch?v=M7lc1UVf-VE')
return render_template("index.html",transcript=getSoup(video_id),embed_link="https://www.youtube.com/embed/M7lc1UVf-VE?enablejsapi=1")
@app.route('/summarize', methods=["POST"])
def getSummary():
video_link = request.form['link']
if(video_link != None and ('youtu.be/' in video_link) or ('youtube.com/watch?v=' in video_link)):
video_id = getVideoId(video_link)
try:
return render_template("index.html", transcript=getSoup(video_id), embed_link="https://www.youtube.com/embed/" + video_id + "?enablejsapi=1")
except:
return render_template("index.html", transcript='<p style="text-align: center;">Uh Oh! Looks like that input isn\'t working!</p>', embed_link="//giphy.com/embed/uPTp0I1c3sfQsg")
else:
return render_template("index.html", transcript='<p style="text-align: center;">Uh Oh! Looks like that input isn\'t working!</p>', embed_link="//giphy.com/embed/uPTp0I1c3sfQs")
def getVideoId(video_link):
if(video_link != None and ('youtu.be/' in video_link) or ('youtube.com/watch?v=' in video_link)):
beginId = video_link.index('?v=')
video_id = video_link[beginId+3:len(video_link)]
return video_id
else:
return None
def toPara(passage):
return '<p>' + passage + '</p>'
def toLink(seconds,video_id):
link = 'https://www.youtube.com/watch?v=' + video_id + '&feature=youtu.be&t=' + str(int(seconds))
return '<a href=\"' + link + '\" target=\"_blank\"">Go to this part in the video</a>'
def getSoup(_video_id):
video_id = _video_id
page = urllib.request.urlopen('http://video.google.com/timedtext?lang=en&v=' + video_id).read()
soup = BeautifulSoup(page, 'xml')
text_elements = soup.findAll('text')
x = summ_it(text_elements,60,video_id)
return x
def summ_it(elems, lapse, video_id):
chunk_lapse = lapse
next_time = chunk_lapse + 1
data = ll.LinkedList()
s = ''
y = 0
last_time = 0
for element in elems:
current_time = float(element['start']);
if(not(current_time <= next_time)):
next_time = float(element['start']) + float(chunk_lapse)
s = re.sub(r'\b\.,\b',',',s)
s = summarize(s)
s = re.sub(r'\b'\b','\'',s)
c = chunk(toLink(last_time,video_id),toPara(s))
last_time = float(element['start'])
data.add(c)
s = ''
temp_elem = re.sub(r'\n',' ',element.text)
s += temp_elem + ' '
try:
s = re.sub(r'\b\.,\b',',',s)
s = summarize(s)
s = re.sub(r'\b'\b','\'',s)
data.add(chunk(toLink(last_time,video_id),toPara(s)))
except:
s = s[s.rfind('.', 0, s.rfind('.')) + 2:len(s)]
data.add(chunk(toLink(last_time,video_id),toPara(s)))
return_string = ''
while(data.hasNext()):
x = data.pop()
return_string += (str(x.getTimestamp()) + x.getContent() + '<br>')
return return_string
def main():
video_id = getVideoId('https://www.youtube.com/watch?v=Qymp_VaFo9M')
if __name__ == '__main__':
main()