Skip to content

Commit 8e2db96

Browse files
committed
fix formatting and comments
1 parent 01314ed commit 8e2db96

13 files changed

+296
-179
lines changed

pycode/examples/epidata/ProgressIndicator.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,28 +17,33 @@
1717
# See the License for the specific language governing permissions and
1818
# limitations under the License.
1919
#############################################################################
20-
"""@ProgressIndicator.py
21-
WARNING: This file is currently not tested and maintained.
22-
"""
20+
2321
from memilio import progress_indicator
2422
import time
2523

2624
print("This is only a usage example, and does not actually do anything.")
25+
# Also, the following values for delay, sleep etc. are chosen arbitrary,
26+
# and have no further relevancy other than to demonstrate the indicator.
27+
2728
# using start/stop
2829
p = progress_indicator.Dots(message="waiting", delay=0.5)
2930
p.start()
3031
time.sleep(1.6)
3132
p.stop()
32-
# using with as block
33+
34+
# using with-as block
3335
with progress_indicator.Percentage(message="download 1", delay=0.4) as p:
3436
for i in range(13):
3537
time.sleep(0.1467)
36-
p.set_progress((i+1)/13)
38+
p.set_progress((i + 1) / 13)
39+
3740
with progress_indicator.Percentage(message="download 2", use_bar=False,
38-
delay=0, keep_output=False) as p:
41+
delay=0, keep_output=False) as p:
3942
for i in range(97):
4043
time.sleep(0.0367)
41-
p.set_progress((i+1)/97)
42-
# using with block ('as' is not usefull without Percentage)
44+
p.set_progress((i + 1) / 97)
45+
46+
# using with block
47+
# the 'as' is only required for calling e.g. message() or set_progress()
4348
with progress_indicator.Spinner(message="finish"):
44-
time.sleep(2)
49+
time.sleep(2)

pycode/memilio-epidata/memilio/epidata/getCaseData.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -327,15 +327,15 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
327327
if dict_files[file][2] == None:
328328
df_local_cs = df_local.cumsum().reset_index(drop=False)
329329
else:
330-
df_local_cs = df_local.groupby(
331-
level=[dict_files[file][0].index(level_index) for level_index in dict_files[file][2]]).cumsum().reset_index()
330+
df_local_cs = df_local.groupby(level=[dict_files[file][0].index(
331+
level_index) for level_index in dict_files[file][2]]).cumsum().reset_index()
332332

333333
if impute_dates or moving_average > 0:
334-
df_local_cs = mdfs.impute_and_reduce_df(df_local_cs,
335-
group_by_cols=dict_files[file][3],
336-
mod_cols=dict_files[file][4],
337-
impute='forward', moving_average=moving_average,
338-
min_date=start_date, max_date=end_date)
334+
df_local_cs = mdfs.impute_and_reduce_df(
335+
df_local_cs, group_by_cols=dict_files[file][3],
336+
mod_cols=dict_files[file][4],
337+
impute='forward', moving_average=moving_average,
338+
min_date=start_date, max_date=end_date)
339339

340340
df_local_cs = mdfs.extract_subframe_based_on_dates(
341341
df_local_cs, start_date, end_date)
@@ -362,10 +362,11 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
362362
plt.show()
363363

364364
if file == 'all_gender':
365-
df.groupby(Geschlecht) \
366-
.agg({AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}) \
367-
.plot(title='COVID-19 infections, deaths, recovered', grid=True,
368-
kind='bar')
365+
df.groupby(Geschlecht).agg(
366+
{AnzahlFall: sum, AnzahlTodesfall: sum,
367+
AnzahlGenesen: sum}).plot(
368+
title='COVID-19 infections, deaths, recovered',
369+
grid=True, kind='bar')
369370
plt.tight_layout()
370371
plt.show()
371372

pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,9 @@ def get_case_data_with_estimations(
8484
rep_date)
8585

8686
# get data from John Hopkins University
87-
gjd.get_jh_data(read_data, file_format, out_folder, no_raw,
88-
start_date, end_date, impute_dates, moving_average, make_plot_jh)
87+
gjd.get_jh_data(
88+
read_data, file_format, out_folder, no_raw, start_date, end_date,
89+
impute_dates, moving_average, make_plot_jh)
8990

9091
# Now we now which data is generated and we can use it
9192
# read in jh data

pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
183183
govkey2numlist = collections.OrderedDict(
184184
zip(govkey_list, list(range(0, len(govkey_list)))))
185185

186-
(countykey2govkey, countykey2localnumlist, gov_county_table, state_gov_table) = assign_geographical_entities(
187-
countykey_list, govkey_list)
186+
(countykey2govkey, countykey2localnumlist, gov_county_table,
187+
state_gov_table) = assign_geographical_entities(countykey_list, govkey_list)
188188

189189
mat_commuter_migration = np.zeros(
190190
[len(countykey_list), len(countykey_list)])
@@ -203,7 +203,6 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
203203
n = 0
204204

205205
for item in files:
206-
207206
# Using the 'Einpendler' sheet to correctly distribute summed values over counties of other gov. region
208207
# This File is in a zip folder so it has to be unzipped first before it can be read.
209208
param_dict = {"sheet_name": 3, "engine": "pyxlsb"}
@@ -233,8 +232,8 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
233232
curr_county_migratedto = commuter_migration_file.iloc[i][1]
234233
current_key = commuter_migration_file.iloc[i][0]
235234
# migration to itself excluded!
236-
counties_migratedfrom[countykey2govkey[current_key]
237-
][countykey2localnumlist[current_key]] = 1
235+
counties_migratedfrom[countykey2govkey[current_key]][
236+
countykey2localnumlist[current_key]] = 1
238237

239238
if not isinstance(commuter_migration_file.iloc[i][2], float):
240239
# removal of nan's, regional keys are stored as strings
@@ -249,8 +248,8 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
249248
val = commuter_migration_file.iloc[i][4]
250249
mat_commuter_migration[current_row, current_col] = val
251250
checksum += val
252-
counties_migratedfrom[countykey2govkey[commuter_migration_file.iloc[i][2]]][
253-
countykey2localnumlist[commuter_migration_file.iloc[i][2]]] = 1
251+
counties_migratedfrom[countykey2govkey[commuter_migration_file.iloc[i][2]]
252+
][countykey2localnumlist[commuter_migration_file.iloc[i][2]]] = 1
254253

255254
# take summed values of other REMAINING counties of government region
256255
# here, some counties of the region are stated explicitly and the rest is summed
@@ -265,15 +264,23 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
265264
# sum population of all counties not explicitly migrated from
266265
# of the current gov region migrated from
267266
dummy_pop_sum = 0
268-
for k in range(0, len(gov_county_table[govkey2numlist[dummy_key_wozeros]])):
267+
for k in range(
268+
0,
269+
len(
270+
gov_county_table
271+
[govkey2numlist[dummy_key_wozeros]])):
269272
if counties_migratedfrom[govkey2numlist[dummy_key_wozeros]][k] < 1:
270273
# get identifier (0-401) for county key
271274
globindex = countykey2numlist[gov_county_table[govkey2numlist[dummy_key_wozeros]][k]]
272275
# sum up
273276
dummy_pop_sum += countypop_list[globindex]
274277

275278
# distribute emigration relatively to county population where migration comes from
276-
for k in range(0, len(gov_county_table[govkey2numlist[dummy_key_wozeros]])):
279+
for k in range(
280+
0,
281+
len(
282+
gov_county_table
283+
[govkey2numlist[dummy_key_wozeros]])):
277284
if counties_migratedfrom[govkey2numlist[dummy_key_wozeros]][k] < 1:
278285
# get identifier (0-401) for county key
279286
globindex = countykey2numlist[gov_county_table[govkey2numlist[dummy_key_wozeros]][k]]
@@ -294,20 +301,32 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
294301
# sum population of all counties not explicitly migrated to
295302
# of the current gov region migrated to
296303
dummy_pop_sum = 0
297-
for k in range(0, len(gov_county_table[govkey2numlist[commuter_migration_file.iloc[i][2]]])):
304+
for k in range(
305+
0,
306+
len(
307+
gov_county_table
308+
[
309+
govkey2numlist
310+
[commuter_migration_file.iloc[i][2]]])):
298311
if counties_migratedfrom[govkey2numlist[commuter_migration_file.iloc[i][2]]][k] < 1:
299312
# get identifier (0-401) for county key
300-
globindex = countykey2numlist[gov_county_table[govkey2numlist[
301-
commuter_migration_file.iloc[i][2]]][k]]
313+
globindex = countykey2numlist[gov_county_table[
314+
govkey2numlist[commuter_migration_file.iloc[i][2]]][k]]
302315
# sum up
303316
dummy_pop_sum += countypop_list[globindex]
304317

305318
# distribute emigration relatively to county population where migration comes from
306-
for k in range(0, len(gov_county_table[govkey2numlist[commuter_migration_file.iloc[i][2]]])):
319+
for k in range(
320+
0,
321+
len(
322+
gov_county_table
323+
[
324+
govkey2numlist
325+
[commuter_migration_file.iloc[i][2]]])):
307326
if counties_migratedfrom[govkey2numlist[commuter_migration_file.iloc[i][2]]][k] < 1:
308327
# get identifier (0-401) for county key
309-
globindex = countykey2numlist[gov_county_table[govkey2numlist[
310-
commuter_migration_file.iloc[i][2]]][k]]
328+
globindex = countykey2numlist[gov_county_table[
329+
govkey2numlist[commuter_migration_file.iloc[i][2]]][k]]
311330
counties_migratedfrom[govkey2numlist[commuter_migration_file.iloc[i][2]]][k] = 1
312331

313332
# set value computed relatively to county size and effective migration
@@ -567,8 +586,9 @@ def main():
567586

568587
arg_dict_commuter = {**arg_dict, "setup_dict": setup_dict}
569588

570-
get_neighbors_mobility(1001, abs_tol=0, rel_tol=0, tol_comb='or',
571-
merge_eisenach=True, out_folder=dd.defaultDict['out_folder'])
589+
get_neighbors_mobility(
590+
1001, abs_tol=0, rel_tol=0, tol_comb='or', merge_eisenach=True,
591+
out_folder=dd.defaultDict['out_folder'])
572592

573593
get_commuter_data(**arg_dict_commuter)
574594

pycode/memilio-epidata/memilio/epidata/getDIVIData.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ def get_divi_data(read_data=dd.defaultDict['read_data'],
8686
# First csv data on 24-04-2020
8787
if start_date < date(2020, 4, 24):
8888
print("Warning: First data available on 2020-04-24. "
89-
"You asked for " + start_date.strftime("%Y-%m-%d") + ". Changed it to 2020-04-24.")
89+
"You asked for " + start_date.strftime("%Y-%m-%d") +
90+
". Changed it to 2020-04-24.")
9091
start_date = date(2020, 4, 24)
9192

9293
directory = os.path.join(out_folder, 'Germany/')

pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ def user_choice(message, default=False):
5555
print("Please answer with y (yes) or n (no)")
5656

5757

58-
def download_file(url, chunk_size=1024, timeout=None, progress_function=None, verify=True, interactive=True):
58+
def download_file(
59+
url, chunk_size=1024, timeout=None, progress_function=None,
60+
verify=True):
5961
"""! Download a file using GET over HTTP.
6062
6163
@param url Full url of the file to download.
@@ -67,19 +69,22 @@ def download_file(url, chunk_size=1024, timeout=None, progress_function=None, ve
6769
@param timeout Timeout in seconds for the GET request.
6870
@param progress_function Function called regularly, with the current
6971
download progress in [0,1] as a float argument.
70-
@param interactive bool. Whether to ask for user input. If False, raises Errors instead.
72+
@param verify bool or "interactive". If False, ignores the connection's
73+
security. If True, only downloads from secure connections are made,
74+
and insecure connections raise a FileNotFoundError. If "interactive",
75+
prompts the user whether or not to allow insecure connections.
7176
@return File as BytesIO
7277
"""
73-
if verify == False:
74-
# suppress this warning since the insecure requests is intentional
75-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
7678
# send GET request as stream so the content is not downloaded at once
7779
try:
78-
req = requests.get(url, stream=True, timeout=timeout, verify=verify)
80+
req = requests.get(
81+
url, stream=True, timeout=timeout,
82+
verify=verify == True or verify == "interactive")
7983
except OSError:
80-
if interactive and user_choice(url + " could not be opened due to an "
81-
"unsecure connection. Do you want to "
82-
"open it anyways?\n"):
84+
if verify == "interactive" and user_choice(
85+
url +
86+
" could not be opened due to an insecure connection. "
87+
"Do you want to open it anyways?\n"):
8388
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
8489
req = requests.get(url, stream=True, timeout=timeout, verify=False)
8590
else:
@@ -132,12 +137,15 @@ def extract_zip(file, **param_dict):
132137
return all_dfs
133138

134139

135-
def get_file(filepath='', url='', read_data=dd.defaultDict['read_data'], param_dict={}, interactive=True):
140+
def get_file(
141+
filepath='', url='', read_data=dd.defaultDict['read_data'],
142+
param_dict={},
143+
interactive=True):
136144
"""! Loads data from filepath and stores it in a pandas dataframe.
137-
If data can't be read from given filepath the user is asked wether the file should be downloaded from the given url or not.
145+
If data can't be read from given filepath the user is asked whether the file should be downloaded from the given url or not.
138146
Uses the progress indicator to give feedback.
139147
140-
@param filepath String. Filepath where data es read from.
148+
@param filepath String. Filepath from where the data is read.
141149
@param url String. URL to download the dataset.
142150
@param read_data True or False. Defines if item is opened from directory (True) or downloaded (False).
143151
@param param_dct Dict. Additional information for download functions (e.g. engine, sheet_name, header...)
@@ -149,18 +157,22 @@ def get_file(filepath='', url='', read_data=dd.defaultDict['read_data'], param_d
149157
param_dict_csv = {"sep": ',', "header": 0, "encoding": None, 'dtype': None}
150158
param_dict_zip = {}
151159

152-
filetype_dict = {'text': pd.read_csv, 'Composite Document File V2 Document': pd.read_excel,
153-
'Excel': pd.read_excel, 'Zip': extract_zip}
154-
param_dict_dict = {pd.read_csv: param_dict_csv,
155-
pd.read_excel: param_dict_excel, extract_zip: param_dict_zip}
160+
filetype_dict = {
161+
'text': pd.read_csv,
162+
'Composite Document File V2 Document': pd.read_excel,
163+
'Excel': pd.read_excel, 'Zip': extract_zip}
164+
param_dict_dict = {
165+
pd.read_csv: param_dict_csv, pd.read_excel: param_dict_excel,
166+
extract_zip: param_dict_zip}
156167

157168
if read_data:
158169
try:
159170
df = pd.read_json(filepath)
160171
except FileNotFoundError:
161-
if interactive and user_choice("Warning: The file: " + filepath +
162-
" does not exist in the directory. Do you want to download "
163-
"the file from " + url + " instead?\n"):
172+
if interactive and user_choice(
173+
"Warning: The file: " + filepath +
174+
" does not exist in the directory. Do you want to download "
175+
"the file from " + url + " instead?\n"):
164176
df = get_file(filepath=filepath, url=url,
165177
read_data=False, param_dict={})
166178
else:
@@ -173,14 +185,16 @@ def get_file(filepath='', url='', read_data=dd.defaultDict['read_data'], param_d
173185
try: # to download file from url and show download progress
174186
with progress_indicator.Percentage(message="Downloading " + url) as p:
175187
file = download_file(
176-
url, 1024, None, p.set_progress, interactive=interactive)
188+
url, 1024, None, p.set_progress,
189+
verify="interactive" if interactive else True)
177190
# read first 2048 bytes to find file type
178191
ftype = magic.from_buffer(file.read(2048))
179192
# set pointer back to starting position
180193
file.seek(0)
181194
# find file type in dict and use function to read
182-
func_to_use = [val for key,
183-
val in filetype_dict.items() if key in ftype]
195+
func_to_use = [
196+
val for key, val in filetype_dict.items()
197+
if key in ftype]
184198
# use different default dict for different functions
185199
dict_to_use = param_dict_dict[func_to_use[0]]
186200
# adjust dict
@@ -330,7 +344,9 @@ def cli(what):
330344
return vars(args)
331345

332346

333-
def append_filename(filename='', impute_dates=False, moving_average=0, split_berlin=False, rep_date=False):
347+
def append_filename(
348+
filename='', impute_dates=False, moving_average=0, split_berlin=False,
349+
rep_date=False):
334350
"""! Creates consistent file names for all output.
335351
"""
336352
# split_berlin and repdate especially for case data

0 commit comments

Comments
 (0)