diff --git a/parse_todesfaelle.py b/parse_todesfaelle.py index 529f6cd..f085435 100644 --- a/parse_todesfaelle.py +++ b/parse_todesfaelle.py @@ -4,11 +4,12 @@ ''' :author: Maximilian Golla :contact: maximilian.golla@rub.de -:version: 0.0.6, 2022-02-03 +:version: 0.0.7, 2022-02-03 :description: Parses and formats RKI Todesfaelle nach Sterbedatum :data: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/COVID-19_Todesfaelle.xlsx ''' +import copy # deep copy dicts import sys import math from collections import OrderedDict @@ -96,10 +97,17 @@ def query(data, base, compare, n): # Base must always be later than (<=) compare if b_year > c_year: - print("Invalid query") - return + sys.stderr.write("Invalid query: b_year {} > c_year {}\n".format(b_year, c_year)) + sys.exit(-1) if b_year == c_year and b_week > c_week: - print("Invalid query") + sys.stderr.write("Invalid query: b_week {} > c_week {}\n".format(b_week, c_week)) + sys.exit(-1) + return + + # We can not determine more than top / flop 16: + if n > len(STATES): + sys.stderr.write("Invalid query: n {} > no. of states {}\n".format(n, len(STATES))) + sys.exit(-1) return # There is no cool way to determine the number of weeks between 202033 and 202119 because of RKI @@ -173,31 +181,52 @@ def query(data, base, compare, n): print("Change (Percent):") print(difference_percent) - # Get Top N entries + + tmp_top = copy.deepcopy(difference_percent) + tmp_flop = copy.deepcopy(difference_percent) + + # Get Top and Flop N entries top = dict() + flop = dict() for i in range(0, n): - max_key = max(difference_percent, key=difference_percent.get) - top[max_key] = difference_percent[max_key] - del difference_percent[max_key] - + max_key = max(tmp_top, key=tmp_top.get) + min_key = min(tmp_flop, key=tmp_flop.get) + top[max_key] = tmp_top[max_key] + flop[min_key] = tmp_flop[min_key] + del tmp_top[max_key] + del tmp_flop[min_key] + print("\nTop {}:".format(n)) print(top) + print("\nFlop {}:".format(n)) + print(flop) - time_series = dict() + time_series_top = dict() + time_series_flop = dict() for i in range(0, end + 1): year = all_entries[i][0:4] week = all_entries[i][4:6] for state in top: if i >= start: - if year not in time_series: - time_series[year] = dict() - if week not in time_series[year]: - time_series[year][week] = dict() - if state not in time_series[year][week]: - time_series[year][week][state] = data[year][week][state] + if year not in time_series_top: + time_series_top[year] = dict() + if week not in time_series_top[year]: + time_series_top[year][week] = dict() + if state not in time_series_top[year][week]: + time_series_top[year][week][state] = data[year][week][state] + for state in flop: + if i >= start: + if year not in time_series_flop: + time_series_flop[year] = dict() + if week not in time_series_flop[year]: + time_series_flop[year][week] = dict() + if state not in time_series_flop[year][week]: + time_series_flop[year][week][state] = data[year][week][state] - print("\nTime Series:") - output(time_series, top) + print("\nTime Series Top:") + output(time_series_top, top) + print("\nTime Series Flop:") + output(time_series_flop, flop) def output(data, states): # Print the header @@ -221,7 +250,7 @@ def main(): output(data, STATES) base = "202130" - compare = "202131" + compare = "202139" query(data, base, compare, 5) if __name__ == '__main__':