Added Flop N entries (and time series)

2022-02-03 23:44:09 +01:00 · 2022-02-03 23:44:09 +01:00 · 4fe0302dd8
parent f5f2a05f7e
commit 4fe0302dd8
1 changed files with 48 additions and 19 deletions
--- a/parse_todesfaelle.py
+++ b/parse_todesfaelle.py
@ -4,11 +4,12 @@
 '''
 :author: Maximilian Golla
 :contact: maximilian.golla@rub.de
-:version: 0.0.6, 2022-02-03
+:version: 0.0.7, 2022-02-03
 :description: Parses and formats RKI Todesfaelle nach Sterbedatum
 :data: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/COVID-19_Todesfaelle.xlsx
 '''

+import copy # deep copy dicts
 import sys
 import math
 from collections import OrderedDict
@ -96,10 +97,17 @@ def query(data, base, compare, n):

    # Base must always be later than (<=) compare
    if b_year > c_year:
-        print("Invalid query")
-        return
+        sys.stderr.write("Invalid query: b_year {} > c_year {}\n".format(b_year, c_year))
+        sys.exit(-1)
    if b_year == c_year and b_week > c_week:
-        print("Invalid query")
+        sys.stderr.write("Invalid query: b_week {} > c_week {}\n".format(b_week, c_week))
+        sys.exit(-1)
+        return
+
+    # We can not determine more than top / flop 16:
+    if n > len(STATES):
+        sys.stderr.write("Invalid query: n {} > no. of states {}\n".format(n, len(STATES)))
+        sys.exit(-1)
        return

    # There is no cool way to determine the number of weeks between 202033 and 202119 because of RKI 
@ -173,31 +181,52 @@ def query(data, base, compare, n):
    print("Change (Percent):")
    print(difference_percent)

-    # Get Top N entries
+
+    tmp_top = copy.deepcopy(difference_percent)
+    tmp_flop = copy.deepcopy(difference_percent)
+
+    # Get Top and Flop N entries
    top = dict()
+    flop = dict()
    for i in range(0, n):
-        max_key = max(difference_percent, key=difference_percent.get)
-        top[max_key] = difference_percent[max_key]
-        del difference_percent[max_key]
-    
+        max_key = max(tmp_top, key=tmp_top.get)
+        min_key = min(tmp_flop, key=tmp_flop.get)
+        top[max_key] = tmp_top[max_key]
+        flop[min_key] = tmp_flop[min_key]
+        del tmp_top[max_key]
+        del tmp_flop[min_key]
+
    print("\nTop {}:".format(n))
    print(top)
+    print("\nFlop {}:".format(n))
+    print(flop)

-    time_series = dict()
+    time_series_top = dict()
+    time_series_flop = dict()
    for i in range(0, end + 1):
        year = all_entries[i][0:4]
        week = all_entries[i][4:6]
        for state in top:
            if i >= start:
-                if year not in time_series:
-                    time_series[year] = dict()
-                if week not in time_series[year]:
-                    time_series[year][week] = dict()
-                if state not in time_series[year][week]:
-                    time_series[year][week][state] = data[year][week][state]
+                if year not in time_series_top:
+                    time_series_top[year] = dict()
+                if week not in time_series_top[year]:
+                    time_series_top[year][week] = dict()
+                if state not in time_series_top[year][week]:
+                    time_series_top[year][week][state] = data[year][week][state]
+        for state in flop:
+            if i >= start:
+                if year not in time_series_flop:
+                    time_series_flop[year] = dict()
+                if week not in time_series_flop[year]:
+                    time_series_flop[year][week] = dict()
+                if state not in time_series_flop[year][week]:
+                    time_series_flop[year][week][state] = data[year][week][state]

-    print("\nTime Series:")
-    output(time_series, top)
+    print("\nTime Series Top:")
+    output(time_series_top, top)
+    print("\nTime Series Flop:")
+    output(time_series_flop, flop)

 def output(data, states):
    # Print the header
@ -221,7 +250,7 @@ def main():
    output(data, STATES)

    base = "202130"
-    compare = "202131"
+    compare = "202139"
    query(data, base, compare, 5)

 if __name__ == '__main__':