forked from avidLearnerInProgress/python-automation-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimdbscraper.py
More file actions
92 lines (82 loc) · 1.96 KB
/
imdbscraper.py
File metadata and controls
92 lines (82 loc) · 1.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from bs4 import BeautifulSoup
import urllib.request as req
from tabulate import tabulate
def getResponse(url):
response = req.urlopen(url)
data = response.read()
soup = BeautifulSoup(data, "lxml")
#print(soup.prettify("utf-8"))
return soup
def selectChoice():
'''options = {
1: ('top'),
2: ('moviemeter'),
3: ('top-english-movies'),
4: ('toptv'),
5: ('tvmeter'),
6: ('bottom'),
7: ('boxoffice')
}
'''
options_map = {
1: ('Top movies' , 'top'),
2: ('Most Popular Movies' , 'moviemeter'),
3: ('Top English Movies' , 'top-english-movies'),
4: ('Top TV Shows' , 'toptv'),
5: ('Most Popular TV Shows' , 'tvmeter'),
6: ('Low Rated Movies', 'bottom'),
7: ('Top Box Office collection', 'boxoffice')
}
for i,option in enumerate(options_map,1):
print("{}) {}".format(i,options_map[option][0]))
choice = int(input('\nChoice please..\n'))
while(choice<1 or choice>len(options_map)):
print('Wrong choice, enter again..')
choice = int(input('\nChoice please..\n'))
return options_map[choice][1]
def getData(base_url, option):
complete_url = base_url + option
soup = getResponse(complete_url)
card_list = soup.find_all('span',{'class':'media-body media-vertical-align'}) #material card list
result = []
count = 1
for card in card_list:
try:
name = card.find('h4').text.replace("\n"," ").lstrip("0123456789.- ") #removes order indexes for movies 1,2,3,4,...
except:
pass
try:
rating = card.find('p').text.strip()
except:
pass
result.append([count,name,rating])
count += 1
print(tabulate(result, headers=["Index", "Name", "Ratings"], tablefmt="grid"))
def main():
base_url = "http://m.imdb.com/chart/"
choice = selectChoice()
#print(choice)
getData(base_url, choice)
if __name__ == '__main__':
main()
'''
#table formats
- "plain"
- "simple"
- "grid"
- "fancy_grid"
- "pipe"
- "orgtbl"
- "jira"
- "presto"
- "psql"
- "rst"
- "mediawiki"
- "moinmoin"
- "youtrack"
- "html"
- "latex"
- "latex_raw"
- "latex_booktabs"
- "textile"
'''