Skip to content

Commit 91c5794

Browse files
BeliApp scrapper added
1 parent 8c90391 commit 91c5794

File tree

9 files changed

+324
-4
lines changed

9 files changed

+324
-4
lines changed

Advanced/decorators/3_maths.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,16 @@
1-
1+
"""
2+
Calling excution :
3+
1 - calling sqaure
4+
2 - calling add
5+
3 - Adding value : 6, 9
6+
4 - Result 15
7+
5 -Squaring : 15
8+
6 - Result 225
9+
7- Value returned = 225
10+
"""
211

312
def add(wrapped_func):
4-
13+
print('calling add')
514
def inner(*args, **kwargs):
615
c = args[0] + args[1]
716
print('Adding value : {}, {}'.format(args[0], args[1]))
@@ -10,7 +19,7 @@ def inner(*args, **kwargs):
1019
return inner
1120

1221
def sqaure(wrapped_func):
13-
22+
print('calling sqaure')
1423
def inner(*args, **kwargs):
1524
c = args[0] ** 2
1625
print('Squaring : {}'.format(args[0]))

Advanced/decorators/4_memo.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import time
22

3-
cached = {}
3+
44

55
def memo(wrapped_func):
6+
cached = {}
67
def inner(n1, n2):
8+
nonlocal cached
79
params = '{}{}'.format(n1, n2)
810
if params not in cached:
911
value = wrapped_func(n1, n2)

Advanced/decorators/5_params.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
def func1(func):
2+
def inner(*args, **kwd):
3+
val = sum(args)
4+
return func(val, **kwd)
5+
return inner
6+
7+
8+
def func2(func):
9+
def inner(*agrs, **kwd):
10+
# print(agrs, type(kwd['c']['c_key']))
11+
total = agrs[0] + kwd['c']['c_key']
12+
kwd = {}
13+
return func(total, kwd)
14+
return inner
15+
16+
@func1
17+
@func2
18+
def something(*args, **kwd):
19+
# print(args, kwd)
20+
return args[0]
21+
22+
23+
ans = something(1, 2, c= {'c_key': 3})
24+
print(ans)

Advanced/recursion/1_dict.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
def get_dict_diff(old, new):
2+
3+
n_dict_diff = {}
4+
def compare(old, new):
5+
dict_diff = {}
6+
if not isinstance(old, dict) or isinstance(new, dict) :
7+
return dict_diff
8+
9+
for key in new:
10+
if key in old:
11+
new_val = new[key]
12+
old_val = old[key]
13+
if isinstance(new_val, dict) and isinstance(old_val, dict):
14+
# print(new_val, old_val)
15+
temp_new, temp_old = {}, {}
16+
temp_new[key] = new[key]
17+
temp_old[key] = old[key]
18+
compare(temp_new, temp_old)
19+
20+
elif old_val != new_val:
21+
dict_diff[key] = new_val
22+
23+
else:
24+
dict_diff[key] = new[key]
25+
return dict_diff
26+
27+
n = compare(old, new)
28+
print(n)
29+
# return dict_diff
30+
31+
32+
a = {
33+
'a' : 1,
34+
'b' : {
35+
'old_1' : 2,
36+
'old_2' : 3
37+
},
38+
'c' : 4
39+
}
40+
41+
b = {
42+
'a' : 1,
43+
'b' : {
44+
'old_1' : 3,
45+
'old_2' : 4
46+
},
47+
'c' : 5
48+
}
49+
50+
ans = get_dict_diff(old=a , new=b)
51+
# print(ans)

Selenium/BeliApp/Task.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
https://app.beliapp.com/lists/northeatsbias
2+
3+
We need to get to extract the following pieces of information from each list:
4+
- Name of Restaurant
5+
- Dollar Sign
6+
- Categories
7+
- Neighborhood
8+
- City
9+
- Rating
10+
11+
For the first entry on the list above, the information would be the following:
12+
- Name of Restaurant: 4 Charles Prime Rib
13+
- Dollar Sign: $$$
14+
- Categories: American, Burgers, Steakhouse
15+
- Neighborhood: West Village
16+
- City: New York
17+
- Rating: 10.0
18+
19+
Turnaround speed is really important! Please only apply if you can get this done within a few hours. Thank you!
20+

Selenium/BeliApp/beli_app.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
2+
from selenium import webdriver
3+
from selenium.webdriver.chrome.service import Service as ChromeService
4+
from webdriver_manager.chrome import ChromeDriverManager
5+
from selenium.webdriver.common.keys import Keys
6+
from selenium.webdriver.common.by import By
7+
from selenium.webdriver.support import expected_conditions as EC
8+
from selenium.webdriver.support.ui import WebDriverWait
9+
10+
import time
11+
import random
12+
import pandas as pd
13+
from bs4 import BeautifulSoup
14+
from datetime import timedelta, datetime
15+
16+
class BeliApp:
17+
18+
URL = "https://app.beliapp.com/lists/northeatsbias"
19+
RUN_TIME = timedelta(minutes=1)
20+
21+
def __init__(self, save_file) -> None:
22+
self.save_file = save_file
23+
24+
def load_driver(self):
25+
options = webdriver.ChromeOptions()
26+
options.add_argument("--start-maximized")
27+
self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options= options)
28+
self.driver.get(self.URL)
29+
30+
def scoll_end(self):
31+
start_time = datetime.now()
32+
time.sleep(5)
33+
body = self.driver.find_element(By.TAG_NAME, 'body')
34+
body.click()
35+
while True:
36+
time.sleep(random.randint(1, 3))
37+
body.send_keys(Keys.PAGE_DOWN)
38+
end_time = datetime.now() - start_time
39+
if end_time.seconds >= self.RUN_TIME.seconds:
40+
break
41+
42+
def extract(self):
43+
soup = BeautifulSoup(self.driver.page_source, 'html.parser')
44+
restraunt_list = []
45+
rows = soup.find_all('ion-grid')
46+
for row in rows:
47+
mapped = {
48+
'restaurant_name' : '',
49+
'dollar_sign' : '',
50+
'neighborhood' : '',
51+
'city' : '',
52+
'rating' : ''
53+
}
54+
paras = row.findAll('p')
55+
mapped['restaurant_name'] = paras[0].get_text()[5:].strip()
56+
categories = paras[1].get_text()
57+
if '|' in categories:
58+
categories = categories.split('|')
59+
mapped['dollar_sign'] = categories[0].strip()
60+
mapped['categories'] = categories[1]
61+
elif '$' in categories:
62+
mapped['dollar_sign'] = categories.strip()
63+
else:
64+
mapped['categories'] = categories
65+
66+
address = paras[2].get_text().split(',')
67+
if len(address) == 1:
68+
mapped['neighborhood'] = address[0]
69+
else:
70+
mapped['neighborhood'] = address[0]
71+
mapped['city'] = address[1]
72+
73+
mapped['rating'] = row.strong.get_text()
74+
restraunt_list.append(mapped)
75+
# print(mapped, end= '\n\n')
76+
77+
df = pd.DataFrame(restraunt_list)
78+
df.to_csv('beli_file.csv', encoding='utf-8', index=False, doublequote=True)
79+
80+
def close(self):
81+
self.driver.close()
82+
83+
if __name__ == '__main__':
84+
beli_app = BeliApp(save_file = '')
85+
beli_app.load_driver()
86+
beli_app.scoll_end()
87+
beli_app.extract()
88+
beli_app.close()

Selenium/BeliApp/beli_file.csv

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
restaurant_name,dollar_sign,neighborhood,city,rating,categories
2+
4 Charles Prime Rib,$$$, West Village, New York ,10.0," American, Burgers, Steakhouse"
3+
SAGA,$$$$, Financial District, New York ,10.0, Global
4+
Le Bernardin,, Theater District, New York ,10.0,French
5+
Peter Luger Steak House,$$$$, Southside, New York ,10.0," American, Steakhouse"
6+
Don Angie,$$$, West Village, New York ,10.0, Italian
7+
The Mad Hatter Restaurant,$$$, Sanibel ,,10.0,
8+
Vetri Cucina,$$$$, Washington Square West, Philadelphia ,10.0," American, Italian"
9+
Bern's Steak House,$$$$, Historic Hyde Park North, Tampa ,10.0," American, Dessert, Steakhouse"
10+
Misi,$$$, Williamsburg, New York ,10.0, Italian
11+
The Daily Catch Restaurant,$$, North End, Boston ,10.0," Italian, Seafood"
12+
I Sodi,$$$, West Village, New York ,10.0, Italian
13+
L'Artusi,$$$, West Village, New York ,10.0, Italian
14+
Marea,$$$$, Midtown West, New York ,10.0," Italian, Seafood"
15+
Oxomoco,$$$, Greenpoint, New York ,9.9, Mexican
16+
The Four Horsemen,$$$, Williamsburg, New York ,9.9, American
17+
Vernick Food & Drink,$$$, Center City West, Philadelphia ,9.9," American, French"
18+
Barclay Prime,$$$$, Rittenhouse Square, Philadelphia ,9.9, Steakhouse
19+
Raoul's,$$$, South Village, New York ,9.9," Bistro, French"
20+
Au Cheval,$$$, Lower Manhattan, New York ,9.9," American, Diner"
21+
Lilia,$$$, Williamsburg, New York ,9.9, Italian
22+
Via Carota,$$$, West Village, New York ,9.9, Italian
23+
Fiorella Pasta,$, Bella Vista, Philadelphia ,9.9, Italian
24+
Pelican & Pig,, Greenwood, Nashville ,9.9,American
25+
Laser Wolf,$$, Olde Kensington, Philadelphia ,9.9," Israeli, Middle Eastern"
26+
Chubby Fish,, Cannonborough Elliotborough, Charleston ,9.9,Seafood
27+
Angelo's Pizzeria,, Bella Vista, Philadelphia ,9.9,Pizza
28+
Joe's Stone Crab,$$$, South Beach, Miami Beach ,9.9," American, Seafood"
29+
Thai Diner,$$, Nolita, New York ,9.9, Thai
30+
The Salt House,$$, Prallsville, New Hope ,9.9, Gastropub
31+
Minetta Tavern,$$$, South Village, New York ,9.9," American, Burgers, Steakhouse"
32+
Carbone,$$$, South Village, New York ,9.9, Italian
33+
Murph's Bar,$$, Fishtown, Philadelphia ,9.9," Bar, Irish, Italian"
34+
Zahav,$$$, Society Hill, Philadelphia ,9.9, Israeli
35+
Prince's Hot Chicken Shack South,$, Townhomes of Shadow Glen, Nashville ,9.9," American, Southern"
36+
Joe's Steaks + Soda Shop,$, Wissinoming, Philadelphia ,9.9,
37+
Suraya Restaurant,$$, North Philadelphia, Philadelphia ,9.9," Lebanese, Middle Eastern"
38+
Dalessandro's Steaks,$, Philadelphia ,,9.9, Sandwiches
39+
Emily,$$, Clinton Hill, New York ,9.8," Burgers, Pizza"
40+
Arnold’s Lobster & Clam Bar,$$, North Eastham, Eastham ,9.8," Ice Cream, Seafood"
41+
Pizzeria Beddia,$$, Fishtown, Philadelphia ,9.8, Pizza
42+
Frank Pepe Pizzeria Napoletana,$$, Wooster Square, New Haven ,9.8, Pizza
43+
Honey,$$$, Doylestown ,,9.8,
44+
Henlopen City Oyster House,$$$, Rehoboth Beach ,,9.8, Seafood
45+
South Philly Barbacoa,$, Passyunk Square, Philadelphia ,9.8," Mexican, Taqueria"
46+
L'Industrie Pizzeria,$, Williamsburg, New York ,9.8, Pizza
47+
Alpen Rose,, Midtown Village, Philadelphia ,9.8,Steakhouse
48+
Golden Diner,$$, Two Bridges, New York ,9.8," American, Diner"
49+
Little Fish BYOB,$$$, Bella Vista, Philadelphia ,9.8," American, Seafood"
50+
Laurel Restaurant,$$$$, East Passyunk Crossing, Philadelphia ,9.8," American, French"
51+
Neptune Oyster,$$$, North End, Boston ,9.8," American, New England, Seafood"

Selenium/BeliApp/requirements.txt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
async-generator==1.10
2+
attrs==22.1.0
3+
beautifulsoup4==4.11.1
4+
certifi==2022.9.24
5+
cffi==1.15.1
6+
charset-normalizer==2.1.1
7+
colorama==0.4.6
8+
exceptiongroup==1.0.0
9+
h11==0.14.0
10+
idna==3.4
11+
numpy==1.23.4
12+
outcome==1.2.0
13+
packaging==21.3
14+
pandas==1.5.1
15+
pycparser==2.21
16+
pyparsing==3.0.9
17+
PySocks==1.7.1
18+
python-dateutil==2.8.2
19+
python-dotenv==0.21.0
20+
pytz==2022.6
21+
requests==2.28.1
22+
selenium==4.5.0
23+
six==1.16.0
24+
sniffio==1.3.0
25+
sortedcontainers==2.4.0
26+
soupsieve==2.3.2.post1
27+
tqdm==4.64.1
28+
trio==0.22.0
29+
trio-websocket==0.9.2
30+
urllib3==1.26.12
31+
webdriver-manager==3.8.4
32+
wsproto==1.2.0

Selenium/BeliApp/soup.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import pandas as pd
2+
from bs4 import BeautifulSoup
3+
4+
with open("Beli.html") as fp:
5+
soup = BeautifulSoup(fp, 'html.parser')
6+
7+
restraunt_list = []
8+
rows = soup.find_all('ion-grid')
9+
for row in rows:
10+
mapped = {
11+
'restaurant_name' : '',
12+
'dollar_sign' : '',
13+
'neighborhood' : '',
14+
'city' : '',
15+
'rating' : ''
16+
}
17+
paras = row.findAll('p')
18+
mapped['restaurant_name'] = paras[0].get_text()[5:].strip()
19+
categories = paras[1].get_text()
20+
if '|' in categories:
21+
categories = categories.split('|')
22+
mapped['dollar_sign'] = categories[0].strip()
23+
mapped['categories'] = categories[1]
24+
elif '$' in categories:
25+
mapped['dollar_sign'] = categories.strip()
26+
else:
27+
mapped['categories'] = categories
28+
29+
address = paras[2].get_text().split(',')
30+
if len(address) == 1:
31+
mapped['neighborhood'] = address[0]
32+
else:
33+
mapped['neighborhood'] = address[0]
34+
mapped['city'] = address[1]
35+
36+
mapped['rating'] = row.strong.get_text()
37+
restraunt_list.append(mapped)
38+
print(mapped, end= '\n\n')
39+
40+
41+
df = pd.DataFrame(restraunt_list)
42+
df.to_csv('beli_file.csv', encoding='utf-8', index=False, doublequote=True)
43+

0 commit comments

Comments
 (0)