"""Denormalized versions of the EIA 860 tables."""
import pandas as pd
from dagster import asset
import pudl
from pudl.metadata.codes import CODE_METADATA
@asset(io_manager_key="pudl_sqlite_io_manager", compute_kind="Python")
[docs]
def denorm_ownership_eia860(
denorm_plants_utilities_eia: pd.DataFrame,
utilities_eia: pd.DataFrame,
ownership_eia860: pd.DataFrame,
) -> pd.DataFrame:
"""A denormalized version of the EIA 860 ownership table.
TODO: Convert to SQL view?
Args:
denorm_plants_utilities_eia: Denormalized table containing plant and utility
names and IDs.
utilities_eia: Table of associations between EIA utility IDs and
PUDL Utility IDs.
ownership_eia860: EIA 860 ownership table.
Returns:
A denormalized version of the EIA 860 ownership table.
"""
pu_df = denorm_plants_utilities_eia.loc[
:,
[
"plant_id_eia",
"plant_id_pudl",
"plant_name_eia",
"report_date",
],
]
own_df = pd.merge(
ownership_eia860, pu_df, on=["report_date", "plant_id_eia"], how="left"
).dropna(
subset=["report_date", "plant_id_eia", "generator_id", "owner_utility_id_eia"]
)
util_df = utilities_eia.loc[:, ["utility_id_eia", "utility_id_pudl"]]
own_df = own_df.merge(
util_df, how="left", left_on="owner_utility_id_eia", right_on="utility_id_eia"
)
first_cols = [
"report_date",
"plant_id_eia",
"plant_id_pudl",
"plant_name_eia",
"owner_utility_id_eia",
"utility_id_pudl",
"owner_utility_name_eia",
"generator_id",
]
# Re-arrange the columns for easier readability:
return pudl.helpers.organize_cols(own_df, first_cols)
@asset(io_manager_key="pudl_sqlite_io_manager", compute_kind="Python")
[docs]
def denorm_emissions_control_equipment_eia860(
emissions_control_equipment_eia860: pd.DataFrame,
denorm_plants_utilities_eia: pd.DataFrame,
) -> pd.DataFrame:
"""A denormalized version of the EIA 860 emission control equipment table.
Args:
emissions_control_equipment_eia860: EIA 860 emissions control equipment table.
denorm_plants_utilities_eia: Denormalized table containing plant and utility
names and IDs.
Returns:
A denormalized version of the EIA 860 emissions control equipment table.
"""
pu_df = denorm_plants_utilities_eia.loc[
:,
[
"plant_id_eia",
"plant_id_pudl",
"plant_name_eia",
"utility_id_eia",
"utility_id_pudl",
"utility_name_eia",
"report_date",
],
]
pu_df = pu_df.assign(report_year=lambda x: x.report_date.dt.year).drop(
columns=["report_date"]
)
emce_df = pd.merge(
emissions_control_equipment_eia860,
pu_df,
on=["report_year", "plant_id_eia"],
how="left",
)
# Add a column for operational status
emce_df["operational_status"] = emce_df.operational_status_code.str.upper().map(
pudl.helpers.label_map(
CODE_METADATA["operational_status_eia"]["df"],
from_col="code",
to_col="operational_status",
null_value=pd.NA,
)
)
return emce_df