-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfetchWeb.py
More file actions
29 lines (24 loc) · 973 Bytes
/
fetchWeb.py
File metadata and controls
29 lines (24 loc) · 973 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from bs4 import BeautifulSoup
import urllib
# r = urllib.urlopen('https://indianapolis.craigslist.org/search/apa?max_price=700&availabilityMode=0').read()
r = urllib.urlopen('https://indianapolis.craigslist.org/apa/5846927650.html').read()
soup = BeautifulSoup(r, "html.parser")
# print type(soup)
# print soup.prettify()[0:1000]
# letters = soup.find_all("li", class_="result-row")
letters = soup.find_all("section", id="postingbody")
# print soup
print letters
lobbying = {}
for element in letters:
lobbying[element.a.get_text()] = {}
prefix = "indianapolis.craigslist.org"
# print prefix+letters[0].a["href"]
print letters
for element in letters:
lobbying[element.a.get_text()]["link"] = prefix + element.a["href"]
for element in letters:
date = element.find(class_="result-date").get_text()
lobbying[element.a.get_text()]["date"] = date
# for item in lobbying.keys():
# print item + ": \n\t"+ "link: " +lobbying[item]["date"], lobbying[item]["link"]