File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -61,6 +61,7 @@ with requests.Session() as s:
6161 for span_tag in soupReading.findAll('span'):
6262 span_tag.replace_with('')
6363 title = subReading['title']
64+ title = " ".join(re.findall("[a-zA-Z0-9.]+", title))
6465 print('Now Scraping Reading: %s' % title)
6566 # Grabbing all of the blue boxes which are denoted by "figure" in the HTML
6667 # Trying to also save them off as word documents, with both text and tables
@@ -69,11 +70,11 @@ with requests.Session() as s:
6970 readingFile = open('%s.html' % title, 'wb')
7071 for figure in figures:
7172 for image_tag in figure.findAll('img'):
72- image_tag['src'] = 'https://jigsaw.vitalsource.com/books/' + str(book['isbn']) + '/epub' + image_tag['src']
73+ image_tag['src'] = 'https://jigsaw.vitalsource.com/books/' + str(book['isbn']) + '/epub/OEBPS/ ' + image_tag['src']
7374 readingFile.write(figure.encode('UTF-8'))
7475 readingFile.close()
7576 else:
7677 continue
77- print('Done! Enjoy!')
78+ print('Done! Enjoy!')
7879
7980
You can’t perform that action at this time.
0 commit comments