4242
4343
4444def read_population_data (username , password ):
45- ''' ! Reads Population data from regionalstatistik.de
45+ """ ! Reads Population data from regionalstatistik.de
4646
4747 Username and Password are required to sign in on regionalstatistik.de.
4848 A request is made to regionalstatistik.de and the StringIO is read in as a csv into the dataframe format.
4949
5050 @param username Username to sign in at regionalstatistik.de.
5151 @param password Password to sign in at regionalstatistik.de.
5252 @return DataFrame
53- '''
53+ """
5454
5555 download_url = 'https://www.regionalstatistik.de/genesis/online?operation=download&code=12411-02-03-4&option=csv'
5656 req = requests .get (download_url , auth = (username , password ))
@@ -63,14 +63,14 @@ def read_population_data(username, password):
6363
6464
6565def path_to_credential_file ():
66- ''' Returns path to .ini file where credentials are stored.
66+ """! Returns path to .ini file where credentials are stored.
6767 The Path can be changed if neccessary.
68- '''
68+ """
6969 return os .path .join (os .path .dirname (os .path .abspath (__file__ )), 'CredentialsRegio.ini' )
7070
7171
7272def manage_credentials (interactive ):
73- ''' ! Manages credentials for regionalstatistik.de (needed for dowload).
73+ """ ! Manages credentials for regionalstatistik.de (needed for dowload).
7474
7575 A connfig file inside the epidata folder is either written (if not existent yet)
7676 with input from user or read with following format:
@@ -79,7 +79,7 @@ def manage_credentials(interactive):
7979 Password = XXXXX
8080
8181 @return Username and password to sign in at regionalstatistik.de.
82- '''
82+ """
8383 # path where ini file is found
8484 path = path_to_credential_file ()
8585
@@ -118,8 +118,8 @@ def manage_credentials(interactive):
118118 return username , password
119119
120120
121- def export_population_dataframe (df_pop , directory , file_format , merge_eisenach ):
122- ''' ! Writes population dataframe into directory with new column names and age groups
121+ def export_population_dataframe (df_pop : pd . DataFrame , directory : str , file_format : str , merge_eisenach : bool ):
122+ """ ! Writes population dataframe into directory with new column names and age groups
123123
124124 @param df_pop Population data DataFrame to be exported
125125 @param directory Directory where data is written to.
@@ -128,7 +128,7 @@ def export_population_dataframe(df_pop, directory, file_format, merge_eisenach):
128128 and 'Eisenach' are listed separately or
129129 combined as one entity 'Wartburgkreis'.
130130 @return exported DataFrame
131- '''
131+ """
132132
133133 new_cols = [
134134 dd .EngEng ['idCounty' ],
@@ -194,7 +194,7 @@ def export_population_dataframe(df_pop, directory, file_format, merge_eisenach):
194194
195195
196196def assign_population_data (df_pop_raw , counties , age_cols , idCounty_idx ):
197- ''' ! Assigns population data of all counties of old dataframe in new created dataframe
197+ """ ! Assigns population data of all counties of old dataframe in new created dataframe
198198
199199 In df_pop_raw there might be additional information like federal states,
200200 governing regions etc. which is not necessary for the dataframe.
@@ -205,7 +205,7 @@ def assign_population_data(df_pop_raw, counties, age_cols, idCounty_idx):
205205 @param age_cols Age groups in old DataFrame
206206 @param idCountyidx indexes in old DataFrame where data of corresponding county starts
207207 @return new DataFrame
208- '''
208+ """
209209
210210 new_cols = {dd .EngEng ['idCounty' ]: counties [:, 1 ],
211211 dd .EngEng ['county' ]: counties [:, 0 ]}
@@ -283,45 +283,25 @@ def test_total_population(df_pop, age_cols):
283283 raise gd .DataError ('Total Population does not match expectation.' )
284284
285285
286- def get_population_data (read_data = dd .defaultDict ['read_data' ],
287- file_format = dd .defaultDict ['file_format' ],
288- out_folder = dd .defaultDict ['out_folder' ],
289- merge_eisenach = True ,
290- username = '' ,
291- password = '' ,
292- ** kwargs ):
293- """! Download age-stratified population data for the German counties.
294-
295- The data we use is:
296- Official 'Bevölkerungsfortschreibung' 12411-02-03-4:
297- 'Bevölkerung nach Geschlecht und Altersgruppen (17)'
298- of regionalstatistik.de.
299- ATTENTION: The raw file cannot be downloaded
300- automatically by our scripts without an Genesis Online account. In order to
301- work on this dataset, please enter your username and password or manually download it from:
302-
303- https://www.regionalstatistik.de/genesis/online -> "1: Gebiet, Bevölkerung,
304- Arbeitsmarkt, Wahlen" -> "12: Bevölkerung" -> "12411 Fortschreibung des
305- Bevölkerungsstandes" -> "12411-02-03-4: Bevölkerung nach Geschlecht und
306- Altersgruppen (17) - Stichtag 31.12. - regionale Tiefe: Kreise und
307- krfr. Städte".
308-
309- Download the xlsx or csv file and put it under dd.defaultDict['out_folder'],
310- this normally is Memilio/data/pydata/Germany.
311- The folders 'pydata/Germany' have to be created if they do not exist yet.
312- Then this script can be run.
286+ def fetch_population_data (read_data : bool = dd .defaultDict ['read_data' ],
287+ out_folder : str = dd .defaultDict ['out_folder' ],
288+ username = '' ,
289+ password = '' ,
290+ ** kwargs
291+ ) -> pd .DataFrame :
292+ """! Downloads or reads the population data.
293+ If it does not already exist, the folder Germany is generated in the given out_folder.
294+ If read_data == True and the file "FullData_population.json" exists, the data is read form this file
295+ and stored in a pandas dataframe. If read_data = True and the file does not exist the program is stopped.
296+ The downloaded dataframe is written to the file "FullData_population".
313297
314298 @param read_data False or True. Defines if data is read from file or
315299 downloaded. Default defined in defaultDict.
316- @param file_format File format which is used for writing the data.
317- Default defined in defaultDict.
318300 @param out_folder Path to folder where data is written in folder
319301 out_folder/Germany. Default defined in defaultDict.
320- @param merge_eisenach [Default: True] or False. Defines whether the
321- counties 'Wartburgkreis' and 'Eisenach' are listed separately or
322- combined as one entity 'Wartburgkreis'.
323- @param username Username to sign in at regionalstatistik.de.
302+ @param username Username to sign in at regionalstatistik.de.
324303 @param password Password to sign in at regionalstatistik.de.
304+
325305 @return DataFrame with adjusted population data for all ages to current level.
326306 """
327307 conf = gd .Conf (out_folder , ** kwargs )
@@ -341,6 +321,22 @@ def get_population_data(read_data=dd.defaultDict['read_data'],
341321
342322 df_pop_raw = read_population_data (username , password )
343323
324+ return df_pop_raw
325+
326+
327+ def preprocess_population_data (df_pop_raw : pd .DataFrame ,
328+ merge_eisenach : bool = True ,
329+ ) -> pd .DataFrame :
330+ """! Processing of the downloaded data
331+ * the columns are renamed to English and the state and county names are added.
332+
333+ @param df_pop_raw pd.DataFrame. A Dataframe containing input population data
334+ @param merge_eisenach [Default: True] or False. Defines whether the
335+ counties 'Wartburgkreis' and 'Eisenach' are listed separately or
336+ combined as one entity 'Wartburgkreis'.
337+
338+ @return df pd.DataFrame. Processed population data
339+ """
344340 column_names = list (df_pop_raw .columns )
345341 # rename columns
346342 rename_columns = {
@@ -381,12 +377,96 @@ def get_population_data(read_data=dd.defaultDict['read_data'],
381377
382378 df_pop = assign_population_data (
383379 df_pop_raw , counties , age_cols , idCounty_idx )
384-
385380 test_total_population (df_pop , age_cols )
381+ return df_pop
386382
383+
384+ def write_population_data (df_pop : pd .DataFrame ,
385+ out_folder : str = dd .defaultDict ['out_folder' ],
386+ file_format : str = dd .defaultDict ['file_format' ],
387+ merge_eisenach : bool = True
388+ ) -> None or pd .DataFrame :
389+ """! Write the population data into json files
390+ Three kinds of structuring of the data are done.
391+ We obtain the chronological sequence of ICU and ICU_ventilated
392+ stored in the files "county_population".json", "state_population.json" and "germany_population.json"
393+ for counties, states and whole Germany, respectively.
394+
395+ @param df_pop pd.DataFrame. A Dataframe containing processed population data
396+ @param file_format str. File format which is used for writing the data. Default defined in defaultDict.
397+ @param out_folder str. Folder where data is written to. Default defined in defaultDict.
398+ @param merge_eisenach [Default: True] or False. Defines whether the
399+ counties 'Wartburgkreis' and 'Eisenach' are listed separately or
400+ combined as one entity 'Wartburgkreis'.
401+
402+ @return None
403+ """
404+ directory = os .path .join (out_folder , 'Germany' )
387405 df_pop_export = export_population_dataframe (
388406 df_pop , directory , file_format , merge_eisenach )
407+ return df_pop_export
408+
409+
410+ def get_population_data (read_data : bool = dd .defaultDict ['read_data' ],
411+ file_format : str = dd .defaultDict ['file_format' ],
412+ out_folder : str = dd .defaultDict ['out_folder' ],
413+ merge_eisenach : bool = True ,
414+ username = '' ,
415+ password = '' ,
416+ ** kwargs
417+ ):
418+ """! Download age-stratified population data for the German counties.
389419
420+ The data we use is:
421+ Official 'Bevölkerungsfortschreibung' 12411-02-03-4:
422+ 'Bevölkerung nach Geschlecht und Altersgruppen (17)'
423+ of regionalstatistik.de.
424+ ATTENTION: The raw file cannot be downloaded
425+ automatically by our scripts without an Genesis Online account. In order to
426+ work on this dataset, please enter your username and password or manually download it from:
427+
428+ https://www.regionalstatistik.de/genesis/online -> "1: Gebiet, Bevölkerung,
429+ Arbeitsmarkt, Wahlen" -> "12: Bevölkerung" -> "12411 Fortschreibung des
430+ Bevölkerungsstandes" -> "12411-02-03-4: Bevölkerung nach Geschlecht und
431+ Altersgruppen (17) - Stichtag 31.12. - regionale Tiefe: Kreise und
432+ krfr. Städte".
433+
434+ Download the xlsx or csv file and put it under dd.defaultDict['out_folder'],
435+ this normally is Memilio/data/pydata/Germany.
436+ The folders 'pydata/Germany' have to be created if they do not exist yet.
437+ Then this script can be run.
438+
439+ @param read_data False or True. Defines if data is read from file or
440+ downloaded. Default defined in defaultDict.
441+ @param file_format File format which is used for writing the data.
442+ Default defined in defaultDict.
443+ @param out_folder Path to folder where data is written in folder
444+ out_folder/Germany. Default defined in defaultDict.
445+ @param merge_eisenach [Default: True] or False. Defines whether the
446+ counties 'Wartburgkreis' and 'Eisenach' are listed separately or
447+ combined as one entity 'Wartburgkreis'.
448+ @param username str. Username to sign in at regionalstatistik.de.
449+ @param password str. Password to sign in at regionalstatistik.de.
450+ @return DataFrame with adjusted population data for all ages to current level.
451+ """
452+ raw_df = fetch_population_data (
453+ read_data = read_data ,
454+ out_folder = out_folder ,
455+ file_format = file_format ,
456+ username = username ,
457+ password = password ,
458+ ** kwargs
459+ )
460+ preprocess_df = preprocess_population_data (
461+ df_pop_raw = raw_df ,
462+ merge_eisenach = merge_eisenach
463+ )
464+ df_pop_export = write_population_data (
465+ df_pop = preprocess_df ,
466+ file_format = file_format ,
467+ out_folder = out_folder ,
468+ merge_eisenach = True
469+ )
390470 return df_pop_export
391471
392472
0 commit comments