Source code for reegis.energy_balance

# -*- coding: utf-8 -*-

"""Prepare parts of the energy balance of Germany and its federal states.

SPDX-FileCopyrightText: 2016-2021 Uwe Krien <krien@uni-bremen.de>

SPDX-License-Identifier: MIT
"""
__copyright__ = "Uwe Krien <krien@uni-bremen.de>"
__license__ = "MIT"


# Python libraries
import os
import logging

# Internal modules
from reegis import config as cfg
from reegis import inhabitants
from reegis import geometries

# External packages
import pandas as pd
import requests


[docs]def get_eb_index_translation_dict(): dic = cfg.get_dict("EB_INDEX_TRANSLATION") dic_keys = list(dic.keys()) for key in dic_keys: for keyword in ["Umw-Einsatz", "Umw-Ausstoß", "Umw-Verbrauch"]: if keyword in key: value = dic.pop(key) key = key.replace(keyword, keyword + ":") dic[key] = value if dic[key] == "": value = key value = value.replace("Umw-Einsatz", "transformation input") value = value.replace("Umw-Ausstoß", "transformation output") value = value.replace("Umw-Verbrauch", "transformation demand") dic[key] = value return dic
[docs]def get_de_balance(year): """Download and return energy balance of germany for a given year.""" url = cfg.get("energy_balance", "url_energy_balance_germany") req = requests.get(url.format(year=str(year)[-2:], suffix="xls")) if int(req.headers["Content-length"]) > 0: fn_de = os.path.join( cfg.get("paths", "energy_balance"), cfg.get("energy_balance", "energy_balance_de_original"), ).format(year=year, suffix="xls") with open(fn_de, "wb") as fout: fout.write(req.content) else: req = requests.get(url.format(year=str(year)[-2:], suffix="xlsx")) if int(req.headers["Content-length"]) > 0: fn_de = os.path.join( cfg.get("paths", "energy_balance"), cfg.get("energy_balance", "energy_balance_de_original"), ).format(year=year, suffix="xlsx") with open(fn_de, "wb") as fout: fout.write(req.content) else: raise ValueError("No file received. Check url.") fn_h = os.path.join( cfg.get("paths", "static_sources"), "energy_balance_header_germany.csv" ) head = pd.read_csv(fn_h, header=[0]).columns df = pd.read_excel(fn_de, "tj", index_col=[0], skiprows=6, usecols="A:AI", nrows=68) df.columns = head[1:] return df
[docs]def get_de_usage_balance(year, grouped=False): """ Parameters ---------- year grouped Returns ------- Examples -------- >>> df=get_de_usage_balance(2015, True) >>> df.loc['total', 'total'] 8898093 """ df = get_de_balance(year) df["Braunkohle (sonstige)"] += df["Hartbraunkohle"] df.drop( ["Hartbraunkohle", "primär (gesamt)", "sekundär (gesamt)", "Row"], axis=1, inplace=True, ) df = df.rename(columns=cfg.get_dict("COLUMN_TRANSLATION")) df = df.rename(cfg.get_dict("SECTOR")) df = df.loc[set(cfg.get_dict("SECTOR_OLD").values())] if grouped: df = df.groupby(by=cfg.get_dict("FUEL_GROUPS"), axis=1).sum() return df
[docs]def get_domestic_retail_share(year, grouped=False): """ Parameters ---------- year grouped Returns ------- Examples -------- >>> df=get_domestic_retail_share(2014, True) >>> df.loc['district heating', 'domestic'] 0.73 """ deb = get_de_usage_balance(year=year, grouped=grouped) deb.sort_index(1, inplace=True) # deb=deb.groupby(level=[1]).sum() share = pd.DataFrame() share["domestic"] = ( deb.loc["domestic"] / deb.loc["domestic and retail"] ).round(2) share["retail"] = ( (deb.loc["retail"] / deb.loc["domestic and retail"]) .round(2) .transpose() ) return share
[docs]def get_states_energy_balance(year=None): """ Get the energy balance for a given year. The input file is the csv-file downloaded from: https://www.lak-energiebilanzen.de/eingabe-dynamisch/?a=e900 Parameters ---------- year : int or None If year is None all possible years will be returned. Returns ------- pandas.DataFrame Notes ----- Translation of the index is incomplete. Examples -------- >>> eb=get_states_energy_balance(2012) >>> eb.loc[(['BB', 'NW'], 'extraction'), 'lignite (raw)'].round(1) BB extraction 316931.2 NW extraction 927025.0 Name: lignite (raw), dtype: float64 >>> eb=get_states_energy_balance() >>> eb.loc[([2012, 2013], ['BB', 'NW'], 'extraction'), 'lignite (raw)' ... ].round(1).sort_index() 2012 BB extraction 316931.2 NW extraction 927025.0 2013 BB extraction 318703.2 NW extraction 894546.0 Name: lignite (raw), dtype: float64 """ header_fn = os.path.join( cfg.get("paths", "static_sources"), cfg.get("energy_balance", "energy_balance_header"), ) header = pd.read_csv(header_fn) if os.path.sep not in cfg.get("energy_balance", "energy_balance_states"): fn = os.path.join( cfg.get("paths", "static_sources"), cfg.get("energy_balance", "energy_balance_states"), ) else: fn = cfg.get("energy_balance", "energy_balance_states") eb = pd.read_csv( fn, sep=";", skiprows=4, index_col=[0, 1, 2], skipfooter=10, engine="python", encoding="utf-8", ) eb.columns = header.columns codes = { "Baden-Württemberg": "BW", "Bayern": "BY", "Berlin": "BE", "Brandenburg": "BB", "Bremen": "HB", "Hamburg": "HH", "Hessen": "HE", "Mecklenburg-Vorpommern": "MV", "Niedersachsen": "NI", "Nordrhein-Westfalen": "NW", "Rheinland-Pfalz": "RP", "Saarland": "SL", "Sachsen": "SN", "Sachsen-Anhalt": "ST", "Schleswig-Holstein": "SH", "Thüringen": "TH", } fs_list = [codes[x] for x in eb.index.get_level_values(0).unique()] eb.index.set_levels(fs_list, level=0, inplace=True) eb = eb.fillna(0) eb.drop(["Anmerkung", "Stand"], axis=1, inplace=True) eb = eb.swaplevel(0, 1) eb.index = eb.index.rename(["", "", ""]) eb = eb.rename(columns=cfg.get_dict("COLUMN_TRANSLATION")) eb = eb.rename(index=get_eb_index_translation_dict(), level=2) if year is not None: eb = eb.loc[year] return eb
[docs]def get_usage_balance(year, grouped=False): """ Get the usage part of the energy balance. Parameters ---------- year : int Year of the energy balance. grouped : bool If set to True the fuels will be grouped to main groups like hard coal or lignite. Returns ------- pandas.DataFrame Examples -------- >>> year=2013 >>> cb=get_usage_balance(year) >>> total=cb.pop('total') >>> int((cb.loc['BE'].sum(axis=1) - total.loc['BE']).sum()) 0 >>> int((cb.loc['ST'].sum(axis=1) - total.loc['ST']).sum()) -8952 >>> int((cb.loc['BY'].sum(axis=1) - total.loc['BY']).sum()) -17731 >>> cb=get_usage_balance(year) >>> cb=fix_usage_balance(cb, year) >>> total=cb.pop('total') >>> int((cb.loc['BE'].sum(axis=1) - total.loc['BE']).sum()) 0 >>> int((cb.loc['ST'].sum(axis=1) - total.loc['ST']).sum()) 0 >>> int((cb.loc['BY'].sum(axis=1) - total.loc['BY']).sum()) 0 """ eb = get_states_energy_balance(year) eb = eb.loc[ (slice(None), list(cfg.get_dict("SECTOR").keys())), slice(None) ] eb = eb.rename(index=cfg.get_dict("SECTOR"), level=1) if grouped: eb = eb.groupby(by=cfg.get_dict("FUEL_GROUPS"), axis=1).sum() return eb
[docs]def fix_usage_balance(eb, year): """ Fixes the energy balances after analysing them. This is done manually. """ if year not in [2012, 2013, 2014]: raise ValueError( "You cannot edit the balance for year {0}.".format(year) ) # ****************************************************************** # Bavaria (Bayern) - Missing coal values # Difference between fuel sum and LAK table missing = {2012: 10529, 2013: 8995, 2014: 9398} fix = missing[year] # the missing value is added to 'hard coal raw' even though it is not # specified which hard coal product is missing. eb.loc[("BY", "total"), "hard coal (raw)"] = fix # There is a small amount specified in the 'domestic and retail' # sector. dom_retail = eb.loc[("BY", "domestic and retail"), "hard coal (raw)"] # The rest of the total hard coal consumption comes from the industrial # sector. eb.loc[("BY", "industrial"), "hard coal (raw)"] = fix - dom_retail # ****************************************************************** # Berlin (Berlin) - corrected values for domestic gas and electricity # In new publications (e.g. LAK table) these values have changed. The newer # values will be used. if year == 2013 or year == 2012: electricity = {2012: 9150, 2013: 7095} gas = {2012: -27883, 2013: -13317} total = {2012: -18733, 2013: -6222} for row in ["total", "domestic and retail", "retail"]: eb.loc[("BE", row), "electricity"] += electricity[year] eb.loc[("BE", row), "natural gas"] += gas[year] eb.loc[("BE", row), "total"] += total[year] # ****************************************************************** # Saxony-Anhalt (Sachsen Anhalt) - missing values for hard coal, oil and # other depending on the year. Due to a lack of information the # difference # will be halved between the sectors. missing = {2012: 5233, 2013: 4396, 2014: 3048} if year == 2012: fix = missing[year] # the missing value is added to 'hard coal raw' even though it is not # specified which hard coal product is missing. eb.loc[("ST", "industrial"), "other"] += fix / 2 eb.loc[("ST", "industrial"), "hard coal (raw)"] += fix / 2 # There is a small amount specified in the 'domestic and retail' # sector. dom_retail_hc = eb.loc[ ("ST", "domestic and retail"), "hard coal (raw)" ] # The rest of the total hard coal consumption comes from the industrial # sector. eb.loc[("ST", "total"), "other"] += fix / 2 eb.loc[("ST", "total"), "hard coal (raw)"] += fix / 2 + dom_retail_hc if year == 2013: fix = missing[year] # the missing value is added to 'hard coal raw' even though it is not # specified which hard coal product is missing. eb.loc[("ST", "industrial"), "mineral oil products"] += fix / 2 eb.loc[("ST", "industrial"), "hard coal (raw)"] += fix / 2 # There is a small amount specified in the 'domestic and retail' # sector. dom_retail_hc = eb.loc[ ("ST", "domestic and retail"), "hard coal (raw)" ] dom_retail_oil = eb.loc[ ("ST", "domestic and retail"), "mineral oil products" ] # The rest of the total hard coal consumption comes from the industrial # sector. eb.loc[("ST", "total"), "mineral oil products"] += fix / 2 + ( dom_retail_oil ) eb.loc[("ST", "total"), "hard coal (raw)"] += fix / 2 + dom_retail_hc if year == 2014: fix = missing[year] # the missing value is added to 'hard coal raw' even though it is not # specified which hard coal product is missing. eb.loc[("ST", "industrial"), "mineral oil products"] += fix / 2 eb.loc[("ST", "industrial"), "hard coal (coke)"] += fix / 2 # There is a small amount specified in the 'domestic and retail' # sector. dom_retail = eb.loc[ ("ST", "domestic and retail"), "mineral oil products" ] # The rest of the total hard coal consumption comes from the industrial # sector. eb.loc[("ST", "total"), "mineral oil products"] += fix / 2 + dom_retail eb.loc[("ST", "total"), "hard coal (coke)"] += fix / 2 return eb
[docs]def get_transformation_balance(year): """ Reshape the energy balance and return the transformation part as a MultiIndex DataFrame. Parameters ---------- year : int Returns ------- pandas.DataFrame Examples -------- >>> year=2014 >>> ub=get_transformation_balance(year) >>> int(ub.loc[('BB', 'input', 'Heizwerke'), 'total']) 0 >>> ub=fix_transformation_balance(ub) >>> int(ub.loc[('BB', 'input', 'Heizwerke'), 'total']) 5347 """ eb = get_states_energy_balance(year) eb = eb.groupby(by=cfg.get_dict("FUEL_GROUPS"), axis=1).sum() my_index = pd.MultiIndex(levels=[[], [], []], codes=[[], [], []]) cb = pd.DataFrame(index=my_index, columns=eb.columns) for i in eb.iterrows(): if "transformation input:" in i[0][1]: cb.loc[ i[0][0], "input", i[0][1].replace("transformation input: ", "") ] = i[1] elif "transformation output:" in i[0][1]: cb.loc[ i[0][0], "output", i[0][1].replace("transformation output: ", ""), ] = i[1] elif "Primär" in i[0][1]: cb.loc[i[0][0], "primary", i[0][1]] = i[1] elif "Energieangebot" in i[0][1]: cb.loc[i[0][0], "tender", i[0][1]] = i[1] elif "Endenergieverbrauch" in i[0][1]: cb.loc[i[0][0], "usage", i[0][1]] = i[1] cb.sort_index(inplace=True) return cb
[docs]def check_transformation_balance(years=None, balance=None, path=None): """ Checks the balance of the transformation balance. If the difference is greater than 5 the name of the region and the difference will be printed. If a path is given wrong balances will be stored as an excel sheet in this path. One excel table for each year will be created. Parameters ---------- years : list List of years to check. balance : pandas.DataFrame (optional) A valid transformation balance to check. path : str A directory where the regions Examples -------- >>> check_transformation_balance([2014]) 2014 - BB: 460589 2014 - BW: 706972 2014 - BY: 2288252 2014 - MV: 19242 2014 - NI: 705997 2014 - SH: 377561 2014 - ST: 51495 >>> ub=get_transformation_balance(2014) >>> ub=check_transformation_balance(balance=ub) nn - BB: 460589 nn - BW: 706972 nn - BY: 2288252 nn - MV: 19242 nn - NI: 705997 nn - SH: 377561 nn - ST: 51495 """ if balance is not None: years = ["nn"] cb = balance cb_orig = cb.copy() else: cb = None cb_orig = None writer = None fn = None for year in years: if balance is None: cb = get_transformation_balance(int(year)) cb_orig = cb.copy() if path is not None: fn = os.path.join(path, "check_{0}.xls".format(year)) writer = pd.ExcelWriter(fn) total = cb.pop("total") for region in cb.index.get_level_values(0).unique(): value = (cb.loc[region].sum(axis=1) - total.loc[region]).sum() if abs(value) > 5: if path is not None: cb_orig.loc[region].to_excel(writer, region) else: print( "{0} - {1}: {2}".format(year, region, int(abs(value))) ) if path is not None: writer.save() logging.info("File saved to {0}".format(fn)) if balance is not None: return cb_orig else: return None
[docs]def fix_transformation_balance(eb): """ This is a fix after a manual analysis of the energy balances. Use with care and check the results., """ # *********** BB ******************************************************** # BB: Total input calculated by total output # BB: Gas input calculated by total input - sum input without gas. eb.loc["BB", "input", "Heizwerke"]["total"] = ( eb.loc["BB", "output", "Heizwerke"]["total"] * 0.9 ) total = eb.loc["BB", "input", "Heizwerke"]["total"] eb.loc["BB", "input", "Heizwerke"]["gas"] = ( total - eb.loc["BB", "input", "Heizwerke"].sum() + total ) # *********** BY ******************************************************** # BY: The missing fuel for CHP is assumed to be hard coal. kwk_row = "Heizkraftwerke der allgemeinen Versorgung (nur KWK)" total = eb.loc["BY", "input", kwk_row]["total"] eb.loc["BY", "input", kwk_row]["hard coal"] = ( total - eb.loc["BY", "input", kwk_row].sum() + total ) # BY: The missing fuel for heat plants is assumed to be natural gas. total = eb.loc["BY", "input", "Heizwerke"]["total"] eb.loc["BY", "input", "Heizwerke"]["gas"] = ( total - eb.loc["BY", "input", "Heizwerke"].sum() + total ) return eb
[docs]def get_transformation_balance_by_region( regions, year, name="region", fix=False ): """ Get the transformation part of the energy balance for a given region set. The values will be recalculated by the number of inhabitants. Parameters ---------- year : int regions : GeoDataFrame name : str fix : bool Returns ------- pandas.DataFrame Examples -------- >>> cb_orig=get_transformation_balance(2014) >>> regions=geometries.load( ... cfg.get('paths', 'geometry'), ... cfg.get('geometry', 'de21_polygons')) >>> cb=get_transformation_balance_by_region(regions, 2014, 'de21') >>> int(cb.sum()['electricity']) == int(cb_orig.sum()['electricity']) True """ cb = get_transformation_balance(year) if fix is True: cb = fix_transformation_balance(cb) # create empty DataFrame to take the transformation balance for the regions my_index = pd.MultiIndex(levels=[[], [], []], codes=[[], [], []]) cb_new = pd.DataFrame(index=my_index, columns=cb.columns) # Use the number of inhabitants to reshape the balance to the new regions logging.debug( "Fetching inhabitants table to reshape the transformation balance." ) ew = inhabitants.get_share_of_federal_states_by_region(year, regions, name) # Loop over the deflex regions for region in sorted(ew.index.get_level_values(0).unique()): # Get all states that intersects with the current deflex-region states = ew.loc[region].index # Sum up the fraction of each state-table to get the new region table for idx in cb.loc[states[0]].index: cb_new.loc[region, idx[0], idx[1]] = 0 for state in states: share = ew.loc[region, state] cb_new.loc[region, idx[0], idx[1]] += cb.loc[ state, idx[0], idx[1] ] * float(share) return cb_new
if __name__ == "__main__": pass