Skip to content

Commit a3f086c

Browse files
Update BookShelfScrape v1.1
1 parent 87ba790 commit a3f086c

1 file changed

Lines changed: 3 additions & 2 deletions

File tree

BookShelfScrape v1.1

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ with requests.Session() as s:
6161
for span_tag in soupReading.findAll('span'):
6262
span_tag.replace_with('')
6363
title = subReading['title']
64+
title = " ".join(re.findall("[a-zA-Z0-9.]+", title))
6465
print('Now Scraping Reading: %s' % title)
6566
# Grabbing all of the blue boxes which are denoted by "figure" in the HTML
6667
# Trying to also save them off as word documents, with both text and tables
@@ -69,11 +70,11 @@ with requests.Session() as s:
6970
readingFile = open('%s.html' % title, 'wb')
7071
for figure in figures:
7172
for image_tag in figure.findAll('img'):
72-
image_tag['src'] = 'https://jigsaw.vitalsource.com/books/' + str(book['isbn']) + '/epub' + image_tag['src']
73+
image_tag['src'] = 'https://jigsaw.vitalsource.com/books/' + str(book['isbn']) + '/epub/OEBPS/' + image_tag['src']
7374
readingFile.write(figure.encode('UTF-8'))
7475
readingFile.close()
7576
else:
7677
continue
77-
print('Done! Enjoy!')
78+
print('Done! Enjoy!')
7879

7980

0 commit comments

Comments
 (0)