-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindeed.py
More file actions
85 lines (64 loc) · 2.28 KB
/
indeed.py
File metadata and controls
85 lines (64 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from bs4 import BeautifulSoup
import pandas as pd
import requests
import time
pages = [10, 20, 30, 40, 50, 60, 70]
titleList = []
companyList = []
locList = []
salList = []
#linkList = []
descList = []
for page in pages:
source = requests.get('https://www.indeed.co.uk/jobs?q=wind+technician&l=essex&radius=100'.format(page)).text
soup = BeautifulSoup(source, 'lxml')
#print ('Page: %s' %page)
results = soup.findAll("div", {"class": "result"})
for jobs in soup.find_all(class_='result'):
try:
title = jobs.find('a', rel='noopener').text.strip()
except Exception as e:
title = None
print('Title:', title)
try:
company = jobs.find('span', class_='company').text.strip()
except Exception as e:
company = None
print('Company:', company)
try:
location = jobs.find('span', class_='location').text.strip()
except Exception as e:
location = None
print('Location:', location)
try:
salary = jobs.find('span', class_='no-wrap').text.strip()
except Exception as e:
salary = None
print('Salary:', salary)
link = jobs.a['href']
if 'http' not in link:
link = ("https://www.indeed.co.uk" + link)
print('Link:', link)
page = requests.get(link)
soup = BeautifulSoup(page.content, 'html.parser')
try:
job_description = soup.find('div', id='jobDescriptionText').decode_contents(formatter="html")
except Exception as e:
job_description = None
print('job_description:', job_description)
titleList.append(title)
companyList.append(company)
locList.append(location)
salList.append(salary)
#linkList.append(link)
descList.append(job_description)
print('--------')
time.sleep(0.5)
df = pd.DataFrame({
'Title':titleList,
'Company':companyList,
'Location':locList,
'Salary':salList,
#'Link':linkList,
'Description':descList})
df.to_csv('indeed.csv',index=False)