SciCompMod
diff --git a/‎.github/workflows/epidata_main.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/epidata_main.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pycode/examples/plot/plotResultsMapGermany.py‎
Lines changed: 1 addition & 1 deletion b/‎pycode/examples/plot/plotResultsMapGermany.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py‎
Lines changed: 20 additions & 24 deletions b/‎pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py‎
Lines changed: 20 additions & 24 deletions
diff --git a/‎pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py‎
Lines changed: 38 additions & 32 deletions b/‎pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py‎
Lines changed: 38 additions & 32 deletions
diff --git a/‎pycode/memilio-epidata/memilio/epidata/getDIVIData.py‎
Lines changed: 3 additions & 1 deletion b/‎pycode/memilio-epidata/memilio/epidata/getDIVIData.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py‎
Lines changed: 16 additions & 8 deletions b/‎pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py‎
Lines changed: 16 additions & 8 deletions
@@ -116,7 +116,7 @@ jobs:
         run: |
           mkdir -p data_dl
           getcasedata -o data_dl --no-progress-indicators
-          getpopuldata -o data_dl --no-progress-indicators
+          getpopuldata -o data_dl --no-progress-indicators --username=${{ secrets.REGIODBUSER }} --password=${{ secrets.REGIODBPW }}
           getjhdata -o data_dl --no-progress-indicators
           getdividata -o data_dl --no-progress-indicators
           getcommutermobility -o data_dl --no-progress-indicators
 
@@ -75,7 +75,7 @@
                 population = gpd.get_population_data(
                     read_data=False, file_format=file_format,
                     out_folder='data/pydata/Germany/', no_raw=True,
-                    split_gender=False, merge_eisenach=True)
+                    merge_eisenach=True)
 
             # For fitting of different age groups we need format ">X".
             age_group_values = list(age_groups.values())
 
@@ -29,7 +29,6 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import requests
 
 from memilio.epidata import defaultDict as dd
 from memilio.epidata import getCaseData as gcd
@@ -243,13 +242,10 @@ def compare_estimated_and_rki_deathsnumbers(
     df_jh_week.rename(
         columns={'deaths_daily': 'Deaths_weekly'}, inplace=True)
 
-    # download weekly deaths numbers from rki
-    if not read_data:
-        download_weekly_deaths_numbers(data_path)
+    df_dict = download_weekly_deaths_numbers(
+        sheet_names=['COVID_Todesfälle'], data_path=data_path)
 
-    df_real_deaths_per_week = pd.read_excel(
-        data_path + "Cases_deaths_weekly.xlsx", sheet_name='COVID_Todesfälle',
-        header=0, engine='openpyxl')
+    df_real_deaths_per_week = df_dict['COVID_Todesfälle']
     df_real_deaths_per_week.rename(
         columns={'Sterbejahr': 'year', 'Sterbewoche': 'week',
                  'Anzahl verstorbene COVID-19 Fälle': 'confirmed_deaths_weekly'},
@@ -321,15 +317,11 @@ def get_weekly_deaths_data_age_gender_resolved(data_path, read_data):
     @param read_data False or True. Defines if data is read from file or downloaded.
     """
 
-    if not read_data:
-        download_weekly_deaths_numbers(data_path)
-
-    df_real_deaths_per_week_age = pd.read_excel(
-        data_path + 'Cases_deaths_weekly.xlsx',
-        sheet_name='COVID_Todesfälle_KW_AG10', header=0, engine='openpyxl')
-    df_real_deaths_per_week_gender = pd.read_excel(
-        data_path + 'Cases_deaths_weekly.xlsx',
-        sheet_name='COVID_Todesfälle_KW_AG20_G', header=0, engine='openpyxl')
+    df_dict = download_weekly_deaths_numbers(sheet_names=[
+                                             'COVID_Todesfälle_KW_AG10', 'COVID_Todesfälle_KW_AG20_G'], data_path=data_path)
+
+    df_real_deaths_per_week_age = df_dict['COVID_Todesfälle_KW_AG10']
+    df_real_deaths_per_week_gender = df_dict['COVID_Todesfälle_KW_AG20_G']
     df_real_deaths_per_week_age.rename(
         columns={'Sterbejahr': 'year', 'Sterbewoche': 'week',
                  'AG 0-9 Jahre': 'age 0-9 years',
@@ -381,21 +373,25 @@ def get_weekly_deaths_data_age_gender_resolved(data_path, read_data):
         'cases_weekly_deaths_gender_resolved', 'json')
 
 
-def download_weekly_deaths_numbers(data_path):
+def download_weekly_deaths_numbers(sheet_names, data_path):
     """!Downloads excel file from RKI webpage
-
+    @param sheet_names List. Sheet names to be returned.
     @param data_path Path where to store the file.
+
+    @return dict of dataframes with sheetnames as keys.
     """
 
-    name_file = "Cases_deaths_weekly.xlsx"
+    name_file = "Cases_deaths_weekly"
     url = "https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/" \
           "COVID-19_Todesfaelle.xlsx?__blob=publicationFile"
 
-    # data_path: path where to safe Excel-file
-    r = requests.get(url)
-    filename = os.path.join(data_path, name_file)
-    with open(filename, 'wb') as output_file:
-        output_file.write(r.content)
+    # Either download excel file from url or read json file from filepath.
+    # Since sheet_names is a list of names get file returns a dict
+    # with sheet_names as keys and their corresponding dataframes as values.
+    df_dict = gd.get_file(filepath=data_path + name_file + '.json', url=url, read_data=False,
+                          param_dict={'sheet_name': sheet_names, 'header': 0, 'engine': 'openpyxl'})
+
+    return df_dict
 
 
 def main():
 
@@ -197,14 +197,16 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
             gd.write_dataframe(
                 commuter_migration_files[state_id_file], mobility_dir, filename, 'json')
 
-    countykey_list = geoger.get_county_ids(merge_eisenach=False, zfill=True)
+    countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True)
     govkey_list = geoger.get_governing_regions()
 
     # get population data for all countys (TODO: better to provide a corresponding method for the following lines in getPopulationData itself)
     # This is not very nice either to have the same file with either Eisenach merged or not...
-
-    population = gPd.get_population_data(
-        out_folder=out_folder, merge_eisenach=False, read_data=read_data)
+    if read_data:
+        population = pd.read_json(directory+'county_current_population.json')
+    else:
+        population = gPd.get_population_data(
+            out_folder=out_folder, merge_eisenach=True, read_data=read_data)
 
     countypop_list = list(population[dd.EngEng["population"]])
 
@@ -244,6 +246,12 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
                         counties_migratedfrom.append(
                             np.zeros(len(gov_county_table[gov_region])))
 
+                    # merge eisenach and wartburgkreis
+                    commuter_migration_file.iloc[:, 2].replace(
+                        '16056', '16063', inplace=True)
+                    commuter_migration_file.iloc[:, 0].replace(
+                        '16056', '16063', inplace=True)
+
                     current_col = countykey2numlist[commuter_migration_file.iloc[i, 0]]
                     curr_county_migratedto = commuter_migration_file.iloc[i, 1]
                     current_key = commuter_migration_file.iloc[i, 0]
@@ -449,30 +457,33 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
 
     # this is neither a very elegant nor a very general way to merge...
     # better options to be searched for!
-    merge_id = 16063
-    new_idx = countykey_list.index(geoger.CountyMerging[merge_id][0])
-    old_idx = countykey_list.index(geoger.CountyMerging[merge_id][1])
-
-    mat_commuter_migration[new_idx, :] = mat_commuter_migration[new_idx,
-                                                                :] + mat_commuter_migration[old_idx, :]
-    mat_commuter_migration[:, new_idx] = mat_commuter_migration[:,
-                                                                new_idx] + mat_commuter_migration[:, old_idx]
-    mat_commuter_migration[new_idx, new_idx] = 0
-
-    mat_commuter_migration = np.delete(mat_commuter_migration, old_idx, axis=0)
-    mat_commuter_migration = np.delete(mat_commuter_migration, old_idx, axis=1)
+    if 16056 in countykey_list:
+        merge_id = 16063
+        new_idx = countykey_list.index(geoger.CountyMerging[merge_id][0])
+        old_idx = countykey_list.index(geoger.CountyMerging[merge_id][1])
+
+        mat_commuter_migration[new_idx, :] = mat_commuter_migration[new_idx,
+                                                                    :] + mat_commuter_migration[old_idx, :]
+        mat_commuter_migration[:, new_idx] = mat_commuter_migration[:,
+                                                                    new_idx] + mat_commuter_migration[:, old_idx]
+        mat_commuter_migration[new_idx, new_idx] = 0
+
+        mat_commuter_migration = np.delete(
+            mat_commuter_migration, old_idx, axis=0)
+        mat_commuter_migration = np.delete(
+            mat_commuter_migration, old_idx, axis=1)
 
     countykey_list = geoger.get_county_ids()
     df_commuter_migration = pd.DataFrame(
         data=mat_commuter_migration, columns=countykey_list)
     df_commuter_migration.index = countykey_list
     commuter_sanity_checks(df_commuter_migration)
-    filename = 'migration_bfa_' + \
-        str(ref_year) + '_dim' + str(mat_commuter_migration.shape[0])
+    filename = 'migration_bfa_' + str(ref_year)
     gd.write_dataframe(df_commuter_migration, directory, filename, file_format)
-    gd.check_dir(os.path.join(directory.split('pydata')[0], 'mobility'))
+    directory = directory.split('pydata')[0] + 'mobility/'
+    gd.check_dir(directory)
     gd.write_dataframe(
-        df_commuter_migration, directory.split('pydata')[0] + 'mobility/',
+        df_commuter_migration, directory,
         'commuter_migration_scaled_' + str(ref_year),
         'txt', {'sep': ' ', 'index': False, 'header': False})
 
@@ -491,7 +502,7 @@ def commuter_sanity_checks(df):
 
 def get_neighbors_mobility(
         countyid, direction='both', abs_tol=0, rel_tol=0, tol_comb='or',
-        merge_eisenach=True, out_folder=dd.defaultDict['out_folder'], ref_year=2022):
+        out_folder=dd.defaultDict['out_folder'], ref_year=2022):
     '''! Returns the neighbors of a particular county ID depening on the
     commuter mobility and given absolute and relative thresholds on the number
     of commuters.
@@ -524,12 +535,8 @@ def get_neighbors_mobility(
     directory = os.path.join(out_folder, 'Germany/')
     gd.check_dir(directory)
     try:
-        if merge_eisenach:
-            commuter = gd.get_file(os.path.join(
-                directory, "migration_bfa_"+str(ref_year)+"_dim400.json"), None, True)
-        else:
-            commuter = gd.get_file(os.path.join(
-                directory, "migration_bfa_"+str(ref_year)+"_dim401.json"), None, True)
+        commuter = gd.get_file(os.path.join(
+            directory, "migration_bfa_"+str(ref_year)+"_dim400.json"), read_data=True)
     except FileNotFoundError:
         print("Commuter data was not found. Download and process it from the internet.")
         commuter = get_commuter_data(out_folder=out_folder, ref_year=ref_year)
@@ -556,7 +563,7 @@ def get_neighbors_mobility(
 
 def get_neighbors_mobility_all(
         direction='both', abs_tol=0, rel_tol=0, tol_comb='or',
-        merge_eisenach=True, out_folder=dd.defaultDict['out_folder'], ref_year=2022):
+        out_folder=dd.defaultDict['out_folder'], ref_year=2022):
     '''! Returns the neighbors of all counties ID depening on the
     commuter mobility and given absolute and relative thresholds on the number
     of commuters.
@@ -580,15 +587,14 @@ def get_neighbors_mobility_all(
     '''
     directory = os.path.join(out_folder, 'Germany/')
     gd.check_dir(directory)
-    countyids = geoger.get_county_ids(merge_eisenach=merge_eisenach)
+    countyids = geoger.get_county_ids()
     neighbors_table = []
-    # TODO:
+    # TODO: performance has to be improved
     for id in countyids:
         neighbors_table.append(
             get_neighbors_mobility(
                 id, direction=direction, abs_tol=abs_tol,
                 rel_tol=rel_tol, tol_comb=tol_comb,
-                merge_eisenach=merge_eisenach,
                 out_folder=out_folder, ref_year=ref_year))
 
     return dict(zip(countyids, neighbors_table))
@@ -612,7 +618,7 @@ def main():
     arg_dict_commuter = {**arg_dict, "setup_dict": setup_dict}
 
     get_neighbors_mobility(
-        1001, abs_tol=0, rel_tol=0, tol_comb='or', merge_eisenach=True,
+        1001, abs_tol=0, rel_tol=0, tol_comb='or',
         out_folder=dd.defaultDict['out_folder'])
 
     get_commuter_data(**arg_dict_commuter)
 
@@ -207,8 +207,10 @@ def divi_data_sanity_checks(df=pd.DataFrame()):
             raise gd.DataError("Error: Data categories have changed.")
     # check if size of dataframe is not unusal
     # data colletion starts at 24.04.2020
+    # TODO: Number of reporting counties get less with time.
+    # Maybe we should look for a new method to sanitize the size of the DataFrame.
     num_dates = (date.today() - date(2020, 4, 24)).days
-    min_num_data = 390*num_dates  # not all 400 counties report every day
+    min_num_data = 380*num_dates  # not all 400 counties report every day
     max_num_data = 400*num_dates
     if (len(df) < min_num_data) or (len(df) > max_num_data):
         raise gd.DataError("Error: unexpected length of dataframe.")
 
@@ -173,10 +173,7 @@ def get_file(
 
     if read_data:
         try:
-            if filepath.endswith('xlsx'):
-                df = pd.read_excel(filepath, **param_dict)
-            else:
-                df = pd.read_json(filepath)
+            df = pd.read_json(filepath)
         except FileNotFoundError:
             if interactive and user_choice(
                 "Warning: The file: " + filepath +
@@ -217,9 +214,12 @@ def get_file(
         if df.empty:
             raise DataError("Error: Dataframe is empty.")
     except AttributeError:
-        for i in range(len(df)):
-            if df[i].empty:
-                raise DataError("Error: Dataframe is empty.")
+        if isinstance(df, list) or isinstance(df, dict):
+            for i in df:
+                if df[i].empty:
+                    raise DataError("Error: Dataframe is empty.")
+        else:
+            raise DataError("Could not catch type of df: " + str(type(df)))
     return df
 
 
@@ -263,7 +263,7 @@ def cli(what):
     cli_dict = {"divi": ['Downloads data from DIVI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'make_plot'],
                 "cases": ['Download case data from RKI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'make_plot', 'split_berlin', 'rep_date'],
                 "cases_est": ['Download case data from RKI and JHU and estimate recovered and deaths', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'make_plot', 'split_berlin', 'rep_date'],
-                "population": ['Download population data from official sources'],
+                "population": ['Download population data from official sources', 'username'],
                 "commuter_official": ['Download commuter data from official sources', 'make_plot'],
                 "vaccination": ['Download vaccination data', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'make_plot', 'sanitize_data'],
                 "testing": ['Download testing data', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'make_plot'],
@@ -351,6 +351,14 @@ def cli(what):
         help='Disables all progress indicators (used for downloads etc.).',
         action='store_true')
 
+    if 'username' in what_list:
+        parser.add_argument(
+            '--username', type=str
+        )
+
+        parser.add_argument(
+            '--password', type=str
+        )
     args = vars(parser.parse_args())
     # disable progress indicators globally, if the argument --no-progress-indicators was specified
     progress_indicator.ProgressIndicator.disable_indicators(