Skip to content

Commit 870a8b1

Browse files
authored
956 update Epidata Readme (#957)
1 parent a5cbb91 commit 870a8b1

5 files changed

Lines changed: 24 additions & 17 deletions

File tree

pycode/memilio-epidata/README.rst

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -136,16 +136,19 @@ If a new functionality shall be added please stick to the following instructions
136136
When you start creating a new script:
137137

138138
- have a look into getDataIntoPandasDataFrame.py there the main functionality which should be used is implemented.
139-
- get_file is used to read in data
140-
- use the dictionaries in defaultDict.py to rename the existing columns of you data
141-
- add new column names to one of the existing languages; english, german and spanish translation exists at the moment.
142-
- for non-english languages always use the EngEng dictionary as the key, thus we can easily change names with just changing one line.
143-
- in defaultDict.py a dictionary with id and state and county name, respectivly exists. Please use it.
139+
- get_file is used to read in data.
140+
- the Conf class sets relevant download options.
141+
- use write_dataframe to write the pandas dataframe to file.
142+
- use check_dir if you want to create a new folder to write data to
143+
- use the dictionaries in defaultDict.py to rename the existing columns of your data
144+
- add new column names to one of the existing language dictionaries; english, german and spanish translation exists at the moment.
145+
- for non-english languages always use the EngEng dictionary as the key, thus we can easily change names with just changing one line.
146+
- in defaultDict.py a dictionary with id, state and county name, respectively exists. Please use it.
144147
- After renaming columns, you should not use pandas dataframe.column but instead use
145148
dataframe[column] where column is given by the dictionaries in defaultDict.py.
146149
Example: ID_County = dd.GerEng['IdLandkreis'] or dd.EngEng['idCounty'].
147-
- use check_dir of getDataIntoPandasDataFrame.py if you want to create a new folder to write data to
148-
- use write_dataframe of getDataIntoPandasDataFrame.py to write the pandas dataframe to file.
150+
- For extensive operations use the progress indicator to give feedback for the user
151+
- ALWAYS use Copy-on-Write for pandas DataFrames.
149152
- use doxygen like comments in code as
150153
- add description in the beginning of the file
151154
- ## Header

pycode/memilio-epidata/memilio/epidata/README.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ optional arguments working for some are:
134134
+---------------------------------------------+-----------------------------------------------------------+
135135
| --password | Password for regionalstatistik.de [population] |
136136
+---------------------------------------------+-----------------------------------------------------------+
137+
| --files | Files to write [case] |
138+
+---------------------------------------------+-----------------------------------------------------------+
137139

138140

139141
Hint:

pycode/memilio-epidata/memilio/epidata/getCaseData.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,12 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
136136
no_raw = conf.no_raw
137137
run_checks = conf.checks
138138

139-
if files == 'All':
139+
if (files == 'All') or (files == ['All']):
140140
files = ['infected', 'deaths', 'all_germany', 'infected_state',
141141
'all_state', 'infected_county', 'all_county', 'all_gender',
142142
'all_state_gender', 'all_county_gender', 'all_age',
143143
'all_state_age', 'all_county_age']
144-
if files == 'Plot':
144+
if (files == 'Plot') or (files == ['Plot']):
145145
# only consider plotable files
146146
files = ['infected', 'deaths', 'all_gender', 'all_age']
147147
# handle error of passing a string of one file instead of a list
@@ -287,14 +287,12 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
287287
'infected_state': [[dateToUse, IdBundesland], {AnzahlFall: "sum"}, [IdBundesland],
288288
{dd.EngEng["idState"]: geoger.get_state_ids()}, ['Confirmed']],
289289
'all_state': [[dateToUse, IdBundesland], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
290-
[IdBundesland], {dd.EngEng["idState"]
291-
: geoger.get_state_ids()},
290+
[IdBundesland], {dd.EngEng["idState"]: geoger.get_state_ids()},
292291
['Confirmed', 'Deaths', 'Recovered']],
293292
'infected_county': [[dateToUse, IdLandkreis], {AnzahlFall: "sum"}, [IdLandkreis],
294293
{dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique()}, ['Confirmed']],
295294
'all_county': [[dateToUse, IdLandkreis], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
296-
[IdLandkreis], {dd.EngEng["idCounty"]
297-
: df[dd.EngEng["idCounty"]].unique()},
295+
[IdLandkreis], {dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique()},
298296
['Confirmed', 'Deaths', 'Recovered']],
299297
'all_gender': [[dateToUse, Geschlecht], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
300298
[Geschlecht], {dd.EngEng["gender"]: list(
@@ -313,8 +311,7 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
313311
), dd.EngEng["gender"]: list(df[dd.EngEng["gender"]].unique())},
314312
['Confirmed', 'Deaths', 'Recovered']],
315313
'all_age': [[dateToUse, Altersgruppe], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
316-
[Altersgruppe], {dd.EngEng["ageRKI"]
317-
: df[dd.EngEng["ageRKI"]].unique()},
314+
[Altersgruppe], {dd.EngEng["ageRKI"]: df[dd.EngEng["ageRKI"]].unique()},
318315
['Confirmed', 'Deaths', 'Recovered']],
319316
'all_state_age': [[dateToUse, IdBundesland, Altersgruppe],
320317
{AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}, [

pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ def cli(what):
357357
# "start_date": ['divi'] }
358358

359359
cli_dict = {"divi": ['Downloads data from DIVI', 'start_date', 'end_date', 'impute_dates', 'moving_average'],
360-
"cases": ['Download case data from RKI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date'],
360+
"cases": ['Download case data from RKI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date', 'files'],
361361
"cases_est": ['Download case data from RKI and JHU and estimate recovered and deaths', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date'],
362362
"population": ['Download population data from official sources', 'username'],
363363
"commuter_official": ['Download commuter data from official sources'],
@@ -440,6 +440,10 @@ def cli(what):
440440
'-sd', '--sanitize-data', type=int, default=dd.defaultDict['sanitize_data'], dest='sanitize_data',
441441
help='Redistributes cases of every county either based on regions ratios or on thresholds and population'
442442
)
443+
if 'files' in what_list:
444+
parser.add_argument(
445+
'--files', nargs="*", default='All'
446+
)
443447

444448
# add optional download options
445449
if '--no-progress-indicators' in sys.argv:

pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ def test_call_functions(
465465
arg_dict_cases = {
466466
**arg_dict_all, **arg_dict_data_download,
467467
"rep_date": dd.defaultDict['rep_date'],
468-
"split_berlin": dd.defaultDict['split_berlin']}
468+
"split_berlin": dd.defaultDict['split_berlin'], 'files': 'All'}
469469

470470
arg_dict_divi = {
471471
**arg_dict_all, **arg_dict_data_download}
@@ -477,6 +477,7 @@ def test_call_functions(
477477
"sanitize_data": dd.defaultDict['sanitize_data']}
478478

479479
arg_dict_cases_est = {**arg_dict_cases}
480+
arg_dict_cases_est.pop('files')
480481

481482
arg_dict_jh = {**arg_dict_all, **arg_dict_data_download}
482483
# change start-date of jh to 2020-01-22

0 commit comments

Comments
 (0)