"""A subpackage to define and organize PUDL database tables by data group."""
import importlib
import pkgutil
from pudl.metadata.helpers import build_foreign_keys
for module_info in pkgutil.iter_modules(__path__):
[docs]
module = importlib.import_module(f"{__name__}.{module_info.name}")
resources = module.RESOURCE_METADATA
RESOURCE_METADATA.update(resources)
[docs]
FOREIGN_KEYS: dict[str, list[dict]] = build_foreign_keys(RESOURCE_METADATA)
"""Generated foreign key constraints by resource name.
See :func:`pudl.metadata.helpers.build_foreign_keys`.
"""
[docs]
ENTITIES: dict[str, dict[str, list[str] | dict[str, str]]] = {
"plants": {
"id_cols": ["plant_id_eia"],
"static_cols": [
"city",
"county",
"latitude",
"longitude",
"plant_name_eia",
"state",
"street_address",
"zip_code",
],
"annual_cols": [
"ash_impoundment",
"ash_impoundment_lined",
"ash_impoundment_status",
"balancing_authority_code_eia",
"balancing_authority_name_eia",
"datum",
"energy_storage",
"ferc_cogen_docket_no",
"ferc_cogen_status",
"ferc_exempt_wholesale_generator_docket_no",
"ferc_exempt_wholesale_generator",
"ferc_small_power_producer_docket_no",
"ferc_small_power_producer",
"ferc_qualifying_facility_docket_no",
"grid_voltage_1_kv",
"grid_voltage_2_kv",
"grid_voltage_3_kv",
"iso_rto_code",
"liquefied_natural_gas_storage",
"natural_gas_local_distribution_company",
"natural_gas_storage",
"natural_gas_pipeline_name_1",
"natural_gas_pipeline_name_2",
"natural_gas_pipeline_name_3",
"nerc_region",
"has_net_metering",
"pipeline_notes",
"primary_purpose_id_naics",
"regulatory_status_code",
"reporting_frequency_code",
"sector_id_eia",
"sector_name_eia",
"service_area",
"transmission_distribution_owner_id",
"transmission_distribution_owner_name",
"transmission_distribution_owner_state",
"utility_id_eia",
"water_source",
"data_maturity",
],
"mapped_schemas": [
{"operator_utility_id_eia": "utility_id_eia"},
{"plant_id_eia_direct_support_1": "plant_id_eia"},
{"plant_id_eia_direct_support_2": "plant_id_eia"},
{"plant_id_eia_direct_support_3": "plant_id_eia"},
],
},
"generators": {
"id_cols": ["plant_id_eia", "generator_id"],
"static_cols": [
"duct_burners",
"generator_operating_date",
"topping_bottoming_code",
"solid_fuel_gasification",
"pulverized_coal_tech",
"fluidized_bed_tech",
"subcritical_tech",
"supercritical_tech",
"ultrasupercritical_tech",
"stoker_tech",
"other_combustion_tech",
"bypass_heat_recovery",
"rto_iso_lmp_node_id",
"rto_iso_location_wholesale_reporting_id",
"associated_combined_heat_power",
"original_planned_generator_operating_date",
"operating_switch",
"previously_canceled",
],
"annual_cols": [
"capacity_mw",
"fuel_type_code_pudl",
"multiple_fuels",
"ownership_code",
"owned_by_non_utility",
"deliver_power_transgrid",
"summer_capacity_mw",
"winter_capacity_mw",
"summer_capacity_estimate",
"winter_capacity_estimate",
"minimum_load_mw",
"distributed_generation",
"technology_description",
"reactive_power_output_mvar",
"prime_mover_code",
"energy_source_code_1",
"energy_source_code_2",
"energy_source_code_3",
"energy_source_code_4",
"energy_source_code_5",
"energy_source_code_6",
"energy_source_1_transport_1",
"energy_source_1_transport_2",
"energy_source_1_transport_3",
"energy_source_2_transport_1",
"energy_source_2_transport_2",
"energy_source_2_transport_3",
"startup_source_code_1",
"startup_source_code_2",
"startup_source_code_3",
"startup_source_code_4",
"time_cold_shutdown_full_load_code",
"syncronized_transmission_grid",
"turbines_num",
"operational_status_code",
"operational_status",
"planned_modifications",
"planned_net_summer_capacity_uprate_mw",
"planned_net_winter_capacity_uprate_mw",
"planned_new_capacity_mw",
"planned_uprate_date",
"planned_net_summer_capacity_derate_mw",
"planned_net_winter_capacity_derate_mw",
"planned_derate_date",
"planned_new_prime_mover_code",
"planned_energy_source_code_1",
"planned_repower_date",
"other_planned_modifications",
"other_modifications_date",
"planned_generator_retirement_date",
"carbon_capture",
"cofire_fuels",
"switch_oil_gas",
"turbines_inverters_hydrokinetics",
"nameplate_power_factor",
"uprate_derate_during_year",
"uprate_derate_completed_date",
"current_planned_generator_operating_date",
"summer_estimated_capability_mw",
"winter_estimated_capability_mw",
"generator_retirement_date",
"utility_id_eia",
"ferc_qualifying_facility",
"data_maturity",
"energy_storage_capacity_mwh",
"net_capacity_mwdc",
],
"mapped_schemas": [
{"operator_utility_id_eia": "utility_id_eia"},
{
"plant_id_eia_direct_support_1": "plant_id_eia",
"generator_id_direct_support_1": "generator_id",
},
{
"plant_id_eia_direct_support_2": "plant_id_eia",
"generator_id_direct_support_2": "generator_id",
},
{
"plant_id_eia_direct_support_3": "plant_id_eia",
"generator_id_direct_support_3": "generator_id",
},
],
},
"boilers": {
"id_cols": ["plant_id_eia", "boiler_id"],
"static_cols": [
"boiler_manufacturer",
"boiler_manufacturer_code",
],
"annual_cols": [
"boiler_operating_date",
"boiler_status",
"boiler_retirement_date",
"boiler_type",
"firing_type_1",
"firing_type_2",
"firing_type_3",
"firing_rate_using_coal_tons_per_hour",
"firing_rate_using_oil_bbls_per_hour",
"firing_rate_using_gas_mcf_per_hour",
"firing_rate_using_other_fuels",
"boiler_fuel_code_1",
"boiler_fuel_code_2",
"boiler_fuel_code_3",
"boiler_fuel_code_4",
"waste_heat_input_mmbtu_per_hour",
"wet_dry_bottom",
"fly_ash_reinjection",
"hrsg",
"max_steam_flow_1000_lbs_per_hour",
"turndown_ratio",
"efficiency_100pct_load",
"efficiency_50pct_load",
"air_flow_100pct_load_cubic_feet_per_minute",
"new_source_review",
"new_source_review_date",
"new_source_review_permit",
"regulation_particulate",
"regulation_so2",
"regulation_nox",
"standard_particulate_rate",
"standard_so2_rate",
"standard_nox_rate",
"unit_particulate",
"unit_so2",
"unit_nox",
"compliance_year_particulate",
"compliance_year_nox",
"compliance_year_so2",
"particulate_control_out_of_compliance_strategy_1",
"particulate_control_out_of_compliance_strategy_2",
"particulate_control_out_of_compliance_strategy_3",
"so2_control_out_of_compliance_strategy_1",
"so2_control_out_of_compliance_strategy_2",
"so2_control_out_of_compliance_strategy_3",
"so2_control_existing_caaa_compliance_strategy_1",
"so2_control_existing_caaa_compliance_strategy_2",
"so2_control_existing_caaa_compliance_strategy_3",
"so2_control_planned_caaa_compliance_strategy_1",
"so2_control_planned_caaa_compliance_strategy_2",
"so2_control_planned_caaa_compliance_strategy_3",
"nox_control_out_of_compliance_strategy_1",
"nox_control_out_of_compliance_strategy_2",
"nox_control_out_of_compliance_strategy_3",
"nox_control_existing_caaa_compliance_strategy_1",
"nox_control_existing_caaa_compliance_strategy_2",
"nox_control_existing_caaa_compliance_strategy_3",
"nox_control_planned_caaa_compliance_strategy_1",
"nox_control_planned_caaa_compliance_strategy_2",
"nox_control_planned_caaa_compliance_strategy_3",
"compliance_year_mercury",
"mercury_control_existing_strategy_1",
"mercury_control_existing_strategy_2",
"mercury_control_existing_strategy_3",
"mercury_control_existing_strategy_4",
"mercury_control_existing_strategy_5",
"mercury_control_existing_strategy_6",
"mercury_control_proposed_strategy_1",
"mercury_control_proposed_strategy_2",
"mercury_control_proposed_strategy_3",
"nox_control_existing_strategy_1",
"nox_control_existing_strategy_2",
"nox_control_existing_strategy_3",
"nox_control_manufacturer",
"nox_control_manufacturer_code",
"nox_control_proposed_strategy_1",
"nox_control_proposed_strategy_2",
"nox_control_proposed_strategy_3",
"nox_control_status_code",
"regulation_mercury",
"so2_control_existing_strategy_1",
"so2_control_existing_strategy_2",
"so2_control_existing_strategy_3",
"so2_control_proposed_strategy_1",
"so2_control_proposed_strategy_2",
"so2_control_proposed_strategy_3",
"standard_so2_percent_scrubbed",
"data_maturity",
],
},
"utilities": {
"id_cols": ["utility_id_eia"],
"static_cols": ["utility_name_eia"],
"annual_cols": [
"street_address",
"city",
"state",
"zip_code",
"entity_type",
"plants_reported_owner",
"plants_reported_operator",
"plants_reported_asset_manager",
"plants_reported_other_relationship",
"attention_line",
"address_2",
"zip_code_4",
"contact_firstname",
"contact_lastname",
"contact_title",
"contact_firstname_2",
"contact_lastname_2",
"contact_title_2",
"phone_extension",
"phone_extension_2",
"phone_number",
"phone_number_2",
"data_maturity",
],
"mapped_schemas": [
{
"owner_utility_id_eia": "utility_id_eia",
"owner_utility_name_eia": "utility_name_eia",
"owner_state": "state",
"owner_country": "country",
"owner_street_address": "street_address",
"owner_zip_code": "zip_code",
"owner_city": "city",
},
{
"operator_utility_id_eia": "utility_id_eia",
"operator_utility_name_eia": "utility_name_eia",
"operator_state": "state",
},
],
},
}
"""Columns kept for either entity or annual EIA tables in the harvesting process.
For each entity type (key), the ID columns, static columns, annual columns, and mapped
columns.
The order of the entities matters. Plants must be harvested before utilities, since
plant location must be removed before the utility locations are harvested.
``mapped_schemas`` allows for harvesting an entity ID / value relationship
from multiple columns in the same input dataframe. Each item in ``mapped_schemas`` is a
dictionary mapping column names in one of the cleaned tables to the standard column names
for that entity. This is useful if a table has entities that should be harvested,
but whose column names don't have the same name as those in the ``id_cols``, ``static_cols``,
or ``annual_cols`` list. For example, in the ownership table the owner and operator utility
columns map to different column names in the other tables, i.e. "owner_utility_id_eia": "utility_id_eia".
In the harvesting process, a copy of the clean dataframe is made, and these columns are
renamed so the relationship can be harvested and added to the normalized entity tables.
"""