Initialer Commit
This commit is contained in:
parent
c19f7e1ba9
commit
8d7e2c8144
197
parse_todesfaelle.py
Normal file
197
parse_todesfaelle.py
Normal file
|
@ -0,0 +1,197 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
:author: Maximilian Golla
|
||||
:contact: maximilian.golla@rub.de
|
||||
:version: 0.0.5, 2022-02-01
|
||||
:description: Parses and formats RKI Todesfaelle nach Sterbedatum
|
||||
:data: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/COVID-19_Todesfaelle.xlsx
|
||||
'''
|
||||
|
||||
import sys
|
||||
import math
|
||||
from collections import OrderedDict
|
||||
|
||||
STATES = {
|
||||
'BB': 2512, # Brandenburg
|
||||
'BE': 3645, # Berlin
|
||||
'BW': 11070, # Baden-Württemberg
|
||||
'BY': 13077, # Bayern
|
||||
'HB': 683, # Bremen
|
||||
'HE': 6266, # Hessen
|
||||
'HH': 1841, # Hamburg
|
||||
'MV': 1610, # Mecklenburg-Vorpommern
|
||||
'NI': 7982, # Niedersachsen
|
||||
'NW': 17933, # Nordrhein-Westfalen
|
||||
'RP': 4085, # Rheinland-Pfalz
|
||||
'SH': 2897, # Schleswig-Holstein
|
||||
'SL': 991, # Saarland
|
||||
'SN': 4078, # Sachsen
|
||||
'ST': 2208, # Sachsen-Anhalt
|
||||
'TH': 2143 # Thüringen
|
||||
}
|
||||
|
||||
def read_file(filename):
|
||||
data = []
|
||||
with open(filename, 'r') as inputfile:
|
||||
inputfile.readline() # Skip the RKI header
|
||||
for line in inputfile:
|
||||
line = line.rstrip('\r\n')
|
||||
data.append(line)
|
||||
return data
|
||||
|
||||
def parse(data):
|
||||
result = OrderedDict()
|
||||
# Initialize the data structure with 0
|
||||
for year in ["2020", "2021", "2022"]:
|
||||
result[year] = OrderedDict()
|
||||
if year == "2020": # In 2020 the RKI reports 53 weeks
|
||||
start = 10
|
||||
stop = 54
|
||||
if year == "2021": # In 2021 the RKI reports 52 weeks
|
||||
start = 1
|
||||
stop = 53
|
||||
if year == "2022": # In 2022 the RKI reports 1 week
|
||||
start = 1
|
||||
stop = 2
|
||||
for week in range(start, stop):
|
||||
week = str(week)
|
||||
if len(week) == 1:
|
||||
week = "0" + str(week)
|
||||
result[year][week] = dict()
|
||||
for state in STATES:
|
||||
result[year][week][state] = 0
|
||||
|
||||
# Parse the actual data
|
||||
for line in data:
|
||||
splitted = line.split(',')
|
||||
state = splitted[0]
|
||||
week = str(splitted[1])
|
||||
if len(week) == 1:
|
||||
week = "0" + str(week)
|
||||
year = str(splitted[2])
|
||||
# Special treatment of "<4" cases
|
||||
if '<' in splitted[3]:
|
||||
dead = int(splitted[3].replace('<', '')) - 1
|
||||
else:
|
||||
dead = int(splitted[3])
|
||||
# Noramlize the data or skip this line for absolute values
|
||||
dead = round( (dead * 100.0) / STATES[state], 2)
|
||||
# Detect possible data issues in RKI data
|
||||
if result[year][week][state] != 0:
|
||||
sys.stderr.write("Error in RKI data: Year {} Week {} State {}\n".format(year, week, state))
|
||||
sys.exit(-1)
|
||||
else:
|
||||
result[year][week][state] = dead
|
||||
return result
|
||||
|
||||
def query(data, base, compare):
|
||||
b_year = base[0:4]
|
||||
b_week = base[4:6]
|
||||
c_year = compare[0:4]
|
||||
c_week = compare[4:6]
|
||||
print("Base week: {}-{}: {}".format(b_year,b_week, data[b_year][b_week]))
|
||||
print("Compare week: {}-{}: {}\n".format(c_year,c_week, data[c_year][c_week]))
|
||||
|
||||
# Base must always be later than (<=) compare
|
||||
if b_year > c_year:
|
||||
print("Invalid query")
|
||||
return
|
||||
if b_year == c_year and b_week > c_week:
|
||||
print("Invalid query")
|
||||
return
|
||||
|
||||
# There is no cool way to determine the number of weeks between 202033 and 202119 because of RKI
|
||||
all_entries = []
|
||||
years = data.keys()
|
||||
for year in data:
|
||||
for week in data[year]:
|
||||
all_entries.append(year + week)
|
||||
|
||||
# Determine the index in our list which marks the base and compare week
|
||||
start = 0
|
||||
end = 0
|
||||
for key, value in enumerate(all_entries):
|
||||
if value == b_year+b_week:
|
||||
start = key
|
||||
if value == c_year+c_week:
|
||||
end = key
|
||||
|
||||
# Init data structures
|
||||
base_value = dict()
|
||||
differences = dict()
|
||||
difference_percent = dict()
|
||||
for state in STATES:
|
||||
base_value[state] = 0
|
||||
differences[state] = 0
|
||||
difference_percent[state] = 0
|
||||
|
||||
# Sum the dead from the beginning of the pandemic to the base week, and from beginning of the pandemic to compare week
|
||||
for i in range(0, end + 1):
|
||||
year = all_entries[i][0:4]
|
||||
week = all_entries[i][4:6]
|
||||
for state in STATES:
|
||||
if i <= start:
|
||||
base_value[state] += data[year][week][state]
|
||||
differences[state] += data[year][week][state]
|
||||
|
||||
print("Beginning to Base Week: ", base_value)
|
||||
print("Beginning to Compare Week:", differences, "\n")
|
||||
|
||||
# Determine the change from the base week to the compare week in absolute and percent
|
||||
for state in STATES:
|
||||
|
||||
# Absolute
|
||||
diff = round(differences[state] - base_value[state], 2)
|
||||
|
||||
# Percentage
|
||||
diff_percent = round( (diff * 100.0) / base_value[state], 2)
|
||||
|
||||
if diff == 0:
|
||||
diff = "+-" + str(diff)
|
||||
elif diff > 0:
|
||||
diff = "+" + str(diff)
|
||||
else:
|
||||
diff = str(diff)
|
||||
differences[state] = diff
|
||||
|
||||
if diff_percent == 0:
|
||||
diff_percent = "+-" + str(diff_percent) + " %"
|
||||
elif diff_percent > 0:
|
||||
diff_percent = "+" + str(diff_percent) + " %"
|
||||
else:
|
||||
diff_percent = str(diff_percent) + " %"
|
||||
difference_percent[state] = diff_percent
|
||||
|
||||
print("Change (Absolute):")
|
||||
print(differences)
|
||||
print("Change (Percent):")
|
||||
print(difference_percent)
|
||||
|
||||
|
||||
def output(data):
|
||||
# Print the header
|
||||
header = ["Jahr", "Woche"]
|
||||
for state in STATES:
|
||||
header.append(state)
|
||||
print("\t".join(header))
|
||||
|
||||
# Print the main data
|
||||
for year in data:
|
||||
for week in data[year]:
|
||||
line = [str(year), str(week)]
|
||||
for state in STATES:
|
||||
dead = str(data[year][week][state])
|
||||
line.append(dead)
|
||||
print("\t".join(line))
|
||||
|
||||
def main():
|
||||
data = read_file('COVID-19_Todesfaelle.csv')
|
||||
data = parse(data)
|
||||
#output(data)
|
||||
query(data, "202130", "202131")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Loading…
Reference in a new issue