@@ -55,7 +55,9 @@ def user_choice(message, default=False):
5555 print ("Please answer with y (yes) or n (no)" )
5656
5757
58- def download_file (url , chunk_size = 1024 , timeout = None , progress_function = None , verify = True , interactive = True ):
58+ def download_file (
59+ url , chunk_size = 1024 , timeout = None , progress_function = None ,
60+ verify = True ):
5961 """! Download a file using GET over HTTP.
6062
6163 @param url Full url of the file to download.
@@ -67,19 +69,22 @@ def download_file(url, chunk_size=1024, timeout=None, progress_function=None, ve
6769 @param timeout Timeout in seconds for the GET request.
6870 @param progress_function Function called regularly, with the current
6971 download progress in [0,1] as a float argument.
70- @param interactive bool. Whether to ask for user input. If False, raises Errors instead.
72+ @param verify bool or "interactive". If False, ignores the connection's
73+ security. If True, only downloads from secure connections are made,
74+ and insecure connections raise a FileNotFoundError. If "interactive",
75+ prompts the user whether or not to allow insecure connections.
7176 @return File as BytesIO
7277 """
73- if verify == False :
74- # suppress this warning since the insecure requests is intentional
75- urllib3 .disable_warnings (urllib3 .exceptions .InsecureRequestWarning )
7678 # send GET request as stream so the content is not downloaded at once
7779 try :
78- req = requests .get (url , stream = True , timeout = timeout , verify = verify )
80+ req = requests .get (
81+ url , stream = True , timeout = timeout ,
82+ verify = verify == True or verify == "interactive" )
7983 except OSError :
80- if interactive and user_choice (url + " could not be opened due to an "
81- "unsecure connection. Do you want to "
82- "open it anyways?\n " ):
84+ if verify == "interactive" and user_choice (
85+ url +
86+ " could not be opened due to an insecure connection. "
87+ "Do you want to open it anyways?\n " ):
8388 urllib3 .disable_warnings (urllib3 .exceptions .InsecureRequestWarning )
8489 req = requests .get (url , stream = True , timeout = timeout , verify = False )
8590 else :
@@ -132,12 +137,15 @@ def extract_zip(file, **param_dict):
132137 return all_dfs
133138
134139
135- def get_file (filepath = '' , url = '' , read_data = dd .defaultDict ['read_data' ], param_dict = {}, interactive = True ):
140+ def get_file (
141+ filepath = '' , url = '' , read_data = dd .defaultDict ['read_data' ],
142+ param_dict = {},
143+ interactive = True ):
136144 """! Loads data from filepath and stores it in a pandas dataframe.
137- If data can't be read from given filepath the user is asked wether the file should be downloaded from the given url or not.
145+ If data can't be read from given filepath the user is asked whether the file should be downloaded from the given url or not.
138146 Uses the progress indicator to give feedback.
139147
140- @param filepath String. Filepath where data es read from .
148+ @param filepath String. Filepath from where the data is read.
141149 @param url String. URL to download the dataset.
142150 @param read_data True or False. Defines if item is opened from directory (True) or downloaded (False).
143151 @param param_dct Dict. Additional information for download functions (e.g. engine, sheet_name, header...)
@@ -149,18 +157,22 @@ def get_file(filepath='', url='', read_data=dd.defaultDict['read_data'], param_d
149157 param_dict_csv = {"sep" : ',' , "header" : 0 , "encoding" : None , 'dtype' : None }
150158 param_dict_zip = {}
151159
152- filetype_dict = {'text' : pd .read_csv , 'Composite Document File V2 Document' : pd .read_excel ,
153- 'Excel' : pd .read_excel , 'Zip' : extract_zip }
154- param_dict_dict = {pd .read_csv : param_dict_csv ,
155- pd .read_excel : param_dict_excel , extract_zip : param_dict_zip }
160+ filetype_dict = {
161+ 'text' : pd .read_csv ,
162+ 'Composite Document File V2 Document' : pd .read_excel ,
163+ 'Excel' : pd .read_excel , 'Zip' : extract_zip }
164+ param_dict_dict = {
165+ pd .read_csv : param_dict_csv , pd .read_excel : param_dict_excel ,
166+ extract_zip : param_dict_zip }
156167
157168 if read_data :
158169 try :
159170 df = pd .read_json (filepath )
160171 except FileNotFoundError :
161- if interactive and user_choice ("Warning: The file: " + filepath +
162- " does not exist in the directory. Do you want to download "
163- "the file from " + url + " instead?\n " ):
172+ if interactive and user_choice (
173+ "Warning: The file: " + filepath +
174+ " does not exist in the directory. Do you want to download "
175+ "the file from " + url + " instead?\n " ):
164176 df = get_file (filepath = filepath , url = url ,
165177 read_data = False , param_dict = {})
166178 else :
@@ -173,14 +185,16 @@ def get_file(filepath='', url='', read_data=dd.defaultDict['read_data'], param_d
173185 try : # to download file from url and show download progress
174186 with progress_indicator .Percentage (message = "Downloading " + url ) as p :
175187 file = download_file (
176- url , 1024 , None , p .set_progress , interactive = interactive )
188+ url , 1024 , None , p .set_progress ,
189+ verify = "interactive" if interactive else True )
177190 # read first 2048 bytes to find file type
178191 ftype = magic .from_buffer (file .read (2048 ))
179192 # set pointer back to starting position
180193 file .seek (0 )
181194 # find file type in dict and use function to read
182- func_to_use = [val for key ,
183- val in filetype_dict .items () if key in ftype ]
195+ func_to_use = [
196+ val for key , val in filetype_dict .items ()
197+ if key in ftype ]
184198 # use different default dict for different functions
185199 dict_to_use = param_dict_dict [func_to_use [0 ]]
186200 # adjust dict
@@ -330,7 +344,9 @@ def cli(what):
330344 return vars (args )
331345
332346
333- def append_filename (filename = '' , impute_dates = False , moving_average = 0 , split_berlin = False , rep_date = False ):
347+ def append_filename (
348+ filename = '' , impute_dates = False , moving_average = 0 , split_berlin = False ,
349+ rep_date = False ):
334350 """! Creates consistent file names for all output.
335351 """
336352 # split_berlin and repdate especially for case data
0 commit comments