Added Flop N entries (and time series)

This commit is contained in:
Maximilian Golla 2022-02-03 23:44:09 +01:00
parent f5f2a05f7e
commit 4fe0302dd8

View file

@ -4,11 +4,12 @@
'''
:author: Maximilian Golla
:contact: maximilian.golla@rub.de
:version: 0.0.6, 2022-02-03
:version: 0.0.7, 2022-02-03
:description: Parses and formats RKI Todesfaelle nach Sterbedatum
:data: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/COVID-19_Todesfaelle.xlsx
'''
import copy # deep copy dicts
import sys
import math
from collections import OrderedDict
@ -96,10 +97,17 @@ def query(data, base, compare, n):
# Base must always be later than (<=) compare
if b_year > c_year:
print("Invalid query")
return
sys.stderr.write("Invalid query: b_year {} > c_year {}\n".format(b_year, c_year))
sys.exit(-1)
if b_year == c_year and b_week > c_week:
print("Invalid query")
sys.stderr.write("Invalid query: b_week {} > c_week {}\n".format(b_week, c_week))
sys.exit(-1)
return
# We can not determine more than top / flop 16:
if n > len(STATES):
sys.stderr.write("Invalid query: n {} > no. of states {}\n".format(n, len(STATES)))
sys.exit(-1)
return
# There is no cool way to determine the number of weeks between 202033 and 202119 because of RKI
@ -173,31 +181,52 @@ def query(data, base, compare, n):
print("Change (Percent):")
print(difference_percent)
# Get Top N entries
tmp_top = copy.deepcopy(difference_percent)
tmp_flop = copy.deepcopy(difference_percent)
# Get Top and Flop N entries
top = dict()
flop = dict()
for i in range(0, n):
max_key = max(difference_percent, key=difference_percent.get)
top[max_key] = difference_percent[max_key]
del difference_percent[max_key]
max_key = max(tmp_top, key=tmp_top.get)
min_key = min(tmp_flop, key=tmp_flop.get)
top[max_key] = tmp_top[max_key]
flop[min_key] = tmp_flop[min_key]
del tmp_top[max_key]
del tmp_flop[min_key]
print("\nTop {}:".format(n))
print(top)
print("\nFlop {}:".format(n))
print(flop)
time_series = dict()
time_series_top = dict()
time_series_flop = dict()
for i in range(0, end + 1):
year = all_entries[i][0:4]
week = all_entries[i][4:6]
for state in top:
if i >= start:
if year not in time_series:
time_series[year] = dict()
if week not in time_series[year]:
time_series[year][week] = dict()
if state not in time_series[year][week]:
time_series[year][week][state] = data[year][week][state]
if year not in time_series_top:
time_series_top[year] = dict()
if week not in time_series_top[year]:
time_series_top[year][week] = dict()
if state not in time_series_top[year][week]:
time_series_top[year][week][state] = data[year][week][state]
for state in flop:
if i >= start:
if year not in time_series_flop:
time_series_flop[year] = dict()
if week not in time_series_flop[year]:
time_series_flop[year][week] = dict()
if state not in time_series_flop[year][week]:
time_series_flop[year][week][state] = data[year][week][state]
print("\nTime Series:")
output(time_series, top)
print("\nTime Series Top:")
output(time_series_top, top)
print("\nTime Series Flop:")
output(time_series_flop, flop)
def output(data, states):
# Print the header
@ -221,7 +250,7 @@ def main():
output(data, STATES)
base = "202130"
compare = "202131"
compare = "202139"
query(data, base, compare, 5)
if __name__ == '__main__':