forked from crawlbase/proxycrawl-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscreenshots_api.py
More file actions
40 lines (34 loc) · 1.53 KB
/
screenshots_api.py
File metadata and controls
40 lines (34 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import uuid, re, os, tempfile
from proxycrawl.base_api import BaseAPI
#
# A Python class that acts as wrapper for ProxyCrawl Screenshots API.
#
# Read ProxyCrawl API documentation https://proxycrawl.com/docs/screenshots-api/
#
# Copyright ProxyCrawl
# Licensed under the Apache License 2.0
#
class ScreenshotsAPI(BaseAPI):
base_path = 'screenshots'
def get(self, url, options = {}):
screenshotPath = options.pop('save_to_path') if 'save_to_path' in options else self.__generateFilepath()
if not re.match(r".+\.(jpg|JPG|jpeg|JPEG)$", screenshotPath):
raise Exception('save_to_path must end with .jpg or .jpeg')
options['url'] = url
response = self.request(options)
with open(screenshotPath,'wb') as f:
f.write(response['body'])
response['file'] = screenshotPath
return response
def post(self, url, data, options = {}):
raise Exception('Only GET is allowed on the Screenshots API')
def parseRegularResponse(self, handler):
headers = handler.headers
BaseAPI.parseRegularResponse(self, handler)
self.response['headers']['success'] = str(headers.get('success'))
self.response['headers']['remaining_requests'] = str(headers.get('remaining_requests'))
self.response['headers']['screenshot_url'] = str(headers.get('screenshot_url'))
def __generateFilename(self):
return str(uuid.uuid4()) + '.jpg'
def __generateFilepath(self):
return os.path.join(tempfile.gettempdir(), self.__generateFilename())