Source code for pudl.constants

"""
A warehouse for constant values required to initilize the PUDL Database.

This constants module stores and organizes a bunch of constant values which are
used throughout PUDL to populate static lists within the data packages or for
data cleaning purposes.
"""

from typing import Any, Dict, List, Tuple, TypedDict

import pandas as pd

from pudl.metadata.enums import (CUSTOMER_CLASSES, EPACEMS_MEASUREMENT_CODES,
                                 EPACEMS_STATES, FUEL_CLASSES, NERC_REGIONS,
                                 RELIABILITY_STANDARDS, REVENUE_CLASSES,
                                 TECH_CLASSES)
from pudl.metadata.labels import (COALMINE_TYPES_EIA, ENTITY_TYPES,
                                  ESTIMATED_OR_ACTUAL, MOMENTARY_INTERRUPTIONS)

[docs]ENTITIES: Dict[str, Tuple[List[str], List[str], List[str], Dict[str, str]]] = { 'plants': ( # base cols ['plant_id_eia'], # static cols ['balancing_authority_code_eia', 'balancing_authority_name_eia', 'city', 'county', 'ferc_cogen_status', 'ferc_exempt_wholesale_generator', 'ferc_small_power_producer', 'grid_voltage_2_kv', 'grid_voltage_3_kv', 'grid_voltage_kv', 'iso_rto_code', 'latitude', 'longitude', 'plant_name_eia', 'primary_purpose_id_naics', 'sector_id_eia', 'sector_name_eia', 'state', 'street_address', 'zip_code'], # annual cols ['ash_impoundment', 'ash_impoundment_lined', 'ash_impoundment_status', 'datum', 'energy_storage', 'ferc_cogen_docket_no', 'water_source', 'ferc_exempt_wholesale_generator_docket_no', 'ferc_small_power_producer_docket_no', 'liquefied_natural_gas_storage', 'natural_gas_local_distribution_company', 'natural_gas_storage', 'natural_gas_pipeline_name_1', 'natural_gas_pipeline_name_2', 'natural_gas_pipeline_name_3', 'nerc_region', 'net_metering', 'pipeline_notes', 'regulatory_status_code', 'respondent_frequency', 'service_area', 'transmission_distribution_owner_id', 'transmission_distribution_owner_name', 'transmission_distribution_owner_state', 'utility_id_eia'], # need type fixing {}, ), 'generators': ( # base cols ['plant_id_eia', 'generator_id'], # static cols ['prime_mover_code', 'duct_burners', 'operating_date', 'topping_bottoming_code', 'solid_fuel_gasification', 'pulverized_coal_tech', 'fluidized_bed_tech', 'subcritical_tech', 'supercritical_tech', 'ultrasupercritical_tech', 'stoker_tech', 'other_combustion_tech', 'bypass_heat_recovery', 'rto_iso_lmp_node_id', 'rto_iso_location_wholesale_reporting_id', 'associated_combined_heat_power', 'original_planned_operating_date', 'operating_switch', 'previously_canceled'], # annual cols ['capacity_mw', 'fuel_type_code_pudl', 'multiple_fuels', 'ownership_code', 'owned_by_non_utility', 'deliver_power_transgrid', 'summer_capacity_mw', 'winter_capacity_mw', 'summer_capacity_estimate', 'winter_capacity_estimate', 'minimum_load_mw', 'distributed_generation', 'technology_description', 'reactive_power_output_mvar', 'energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_1_transport_1', 'energy_source_1_transport_2', 'energy_source_1_transport_3', 'energy_source_2_transport_1', 'energy_source_2_transport_2', 'energy_source_2_transport_3', 'startup_source_code_1', 'startup_source_code_2', 'startup_source_code_3', 'startup_source_code_4', 'time_cold_shutdown_full_load_code', 'syncronized_transmission_grid', 'turbines_num', 'operational_status_code', 'operational_status', 'planned_modifications', 'planned_net_summer_capacity_uprate_mw', 'planned_net_winter_capacity_uprate_mw', 'planned_new_capacity_mw', 'planned_uprate_date', 'planned_net_summer_capacity_derate_mw', 'planned_net_winter_capacity_derate_mw', 'planned_derate_date', 'planned_new_prime_mover_code', 'planned_energy_source_code_1', 'planned_repower_date', 'other_planned_modifications', 'other_modifications_date', 'planned_retirement_date', 'carbon_capture', 'cofire_fuels', 'switch_oil_gas', 'turbines_inverters_hydrokinetics', 'nameplate_power_factor', 'uprate_derate_during_year', 'uprate_derate_completed_date', 'current_planned_operating_date', 'summer_estimated_capability_mw', 'winter_estimated_capability_mw', 'retirement_date', 'utility_id_eia', 'data_source'], # need type fixing {} ), # utilities must come after plants. plant location needs to be # removed before the utility locations are compiled 'utilities': ( # base cols ['utility_id_eia'], # static cols ['utility_name_eia'], # annual cols ['street_address', 'city', 'state', 'zip_code', 'entity_type', 'plants_reported_owner', 'plants_reported_operator', 'plants_reported_asset_manager', 'plants_reported_other_relationship', 'attention_line', 'address_2', 'zip_code_4', 'contact_firstname', 'contact_lastname', 'contact_title', 'contact_firstname_2', 'contact_lastname_2', 'contact_title_2', 'phone_extension', 'phone_extension_2', 'phone_number', 'phone_number_2'], # need type fixing {'utility_id_eia': 'int64'}, ), 'boilers': ( # base cols ['plant_id_eia', 'boiler_id'], # static cols ['prime_mover_code'], # annual cols [], # need type fixing {},
) } """ Columns kept for either entity or annual EIA tables in the harvesting process. For each entity type (key), the ID columns, static columns, and annual columns, followed by any custom data type fixes. The order of the entities matters. Plants must be harvested before utilities, since plant location must be removed before the utility locations are harvested. """
[docs]class Partition(TypedDict, total=False): """Data partition."""
[docs] years: Tuple[int, ...]
[docs] year_month: str
[docs] states: Tuple[str, ...]
[docs]WORKING_PARTITIONS: Dict[str, Partition] = { 'eia860': { 'years': tuple(range(2001, 2021)) }, 'eia860m': { 'year_month': '2021-08' }, 'eia861': { 'years': tuple(range(2001, 2021)) }, 'eia923': { 'years': tuple(range(2001, 2021)) }, 'epacems': { 'years': tuple(range(1995, 2021)), 'states': tuple(EPACEMS_STATES), }, 'ferc1': { 'years': tuple(range(1994, 2021)) }, 'ferc714': {},
} """ Per-dataset descriptions of what raw input data partitions can be processed. Most of our datasets are distributed in chunks that correspond to a given year, state, or other logical partition. Not all available partitions of the raw have data have been integrated into PUDL. The sub-keys within each dataset partition dictionary refer to metadata in the data packages we have archived on Zenodo, which contain the original raw input data. Note: ferc714 is not partitioned by year and is available only as a single file containing all data. """
[docs]PUDL_TABLES: Dict[str, Tuple[str]] = { 'eia860': ( 'boiler_generator_assn_eia860', 'utilities_eia860', 'plants_eia860', 'generators_eia860', 'ownership_eia860', ), 'eia861': ( "service_territory_eia861", "balancing_authority_eia861", "sales_eia861", "advanced_metering_infrastructure_eia861", "demand_response_eia861", "demand_side_management_eia861", "distributed_generation_eia861", "distribution_systems_eia861", "dynamic_pricing_eia861", "energy_efficiency_eia861", "green_pricing_eia861", "mergers_eia861", "net_metering_eia861", "non_net_metering_eia861", "operational_data_eia861", "reliability_eia861", "utility_data_eia861", ), 'eia923': ( 'generation_fuel_eia923', 'boiler_fuel_eia923', 'generation_eia923', 'coalmine_eia923', 'fuel_receipts_costs_eia923', ), 'entity_tables': ( 'utilities_entity_eia', 'plants_entity_eia', 'generators_entity_eia', 'boilers_entity_eia', ), 'epacems': ( "hourly_emissions_epacems", ), 'ferc1': ( 'fuel_ferc1', 'plants_steam_ferc1', 'plants_small_ferc1', 'plants_hydro_ferc1', 'plants_pumped_storage_ferc1', 'purchased_power_ferc1', 'plant_in_service_ferc1', ), 'ferc714': ( "respondent_id_ferc714", "id_certification_ferc714", "gen_plants_ba_ferc714", "demand_monthly_ba_ferc714", "net_energy_load_ba_ferc714", "adjacency_ba_ferc714", "interchange_ba_ferc714", "lambda_hourly_ba_ferc714", "lambda_description_ferc714", "description_pa_ferc714", "demand_forecast_pa_ferc714", "demand_hourly_pa_ferc714", ), 'glue': ( 'plants_eia', 'plants_ferc', 'plants', 'utilities_eia', 'utilities_ferc', 'utilities', 'utility_plant_assn',
) } """Core PUDL DB tables by data source. Used to validate ETL inputs."""
[docs]COLUMN_DTYPES: Dict[str, Dict[str, Any]] = { "ferc1": { # Obviously this is not yet a complete list... "construction_year": pd.Int64Dtype(), "installation_year": pd.Int64Dtype(), "plant_id_ferc1": pd.Int64Dtype(), "plant_id_pudl": pd.Int64Dtype(), "report_date": "datetime64[ns]", "report_year": pd.Int64Dtype(), "utility_id_ferc1": pd.Int64Dtype(), "utility_id_pudl": pd.Int64Dtype(), }, "ferc714": { # INCOMPLETE "demand_mwh": float, "demand_annual_mwh": float, "eia_code": pd.Int64Dtype(), "peak_demand_summer_mw": float, "peak_demand_winter_mw": float, "report_date": "datetime64[ns]", "respondent_id_ferc714": pd.Int64Dtype(), "respondent_name_ferc714": pd.StringDtype(), "respondent_type": pd.CategoricalDtype( categories=["utility", "balancing_authority"] ), "timezone": pd.CategoricalDtype( categories=[ "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", "America/Anchorage", "Pacific/Honolulu", ] ), "utc_datetime": "datetime64[ns]", }, "epacems": { 'state': pd.CategoricalDtype(categories=EPACEMS_STATES), 'plant_id_eia': "int32", 'unitid': pd.StringDtype(), 'operating_datetime_utc': "datetime64[ns]", 'operating_time_hours': "float32", 'gross_load_mw': "float32", 'steam_load_1000_lbs': "float32", 'so2_mass_lbs': "float32", 'so2_mass_measurement_code': pd.CategoricalDtype( categories=EPACEMS_MEASUREMENT_CODES ), 'nox_rate_lbs_mmbtu': "float32", 'nox_rate_measurement_code': pd.CategoricalDtype( categories=EPACEMS_MEASUREMENT_CODES ), 'nox_mass_lbs': "float32", 'nox_mass_measurement_code': pd.CategoricalDtype( categories=EPACEMS_MEASUREMENT_CODES ), 'co2_mass_tons': "float32", 'co2_mass_measurement_code': pd.CategoricalDtype( categories=EPACEMS_MEASUREMENT_CODES ), 'heat_content_mmbtu': "float32", 'facility_id': pd.Int32Dtype(), # Nullable Integer 'unit_id_epa': pd.Int32Dtype(), # Nullable Integer }, "eia": { 'actual_peak_demand_savings_mw': float, # Added by AES for DR table 'address_2': pd.StringDtype(), # Added by AES for 860 utilities table 'advanced_metering_infrastructure': pd.Int64Dtype(), # Added by AES for AMI table # Added by AES for UD misc table 'alternative_fuel_vehicle_2_activity': pd.BooleanDtype(), 'alternative_fuel_vehicle_activity': pd.BooleanDtype(), 'annual_indirect_program_cost': float, 'annual_total_cost': float, 'ash_content_pct': float, 'ash_impoundment': pd.BooleanDtype(), 'ash_impoundment_lined': pd.BooleanDtype(), # TODO: convert this field to more descriptive words 'ash_impoundment_status': pd.StringDtype(), 'associated_combined_heat_power': pd.BooleanDtype(), 'attention_line': pd.StringDtype(), 'automated_meter_reading': pd.Int64Dtype(), # Added by AES for AMI table 'backup_capacity_mw': float, # Added by AES for NNM & DG misc table 'balancing_authority_code_eia': pd.CategoricalDtype(), 'balancing_authority_id_eia': pd.Int64Dtype(), 'balancing_authority_name_eia': pd.StringDtype(), 'bga_source': pd.StringDtype(), 'boiler_id': pd.StringDtype(), 'bunded_activity': pd.BooleanDtype(), 'business_model': pd.CategoricalDtype( categories=["retail", "energy_services"] ), 'buy_distribution_activity': pd.BooleanDtype(), 'buying_transmission_activity': pd.BooleanDtype(), 'bypass_heat_recovery': pd.BooleanDtype(), 'caidi_w_major_event_days_minus_loss_of_service_minutes': float, 'caidi_w_major_event_dats_minutes': float, 'caidi_wo_major_event_days_minutes': float, 'capacity_mw': float, 'carbon_capture': pd.BooleanDtype(), 'chlorine_content_ppm': float, 'circuits_with_voltage_optimization': pd.Int64Dtype(), 'city': pd.StringDtype(), 'cofire_fuels': pd.BooleanDtype(), 'consumed_by_facility_mwh': float, 'consumed_by_respondent_without_charge_mwh': float, 'contact_firstname': pd.StringDtype(), 'contact_firstname_2': pd.StringDtype(), 'contact_lastname': pd.StringDtype(), 'contact_lastname_2': pd.StringDtype(), 'contact_title': pd.StringDtype(), 'contact_title_2': pd.StringDtype(), 'contract_expiration_date': 'datetime64[ns]', 'contract_type_code': pd.StringDtype(), 'county': pd.StringDtype(), 'county_id_fips': pd.StringDtype(), # Must preserve leading zeroes 'credits_or_adjustments': float, 'critical_peak_pricing': pd.BooleanDtype(), 'critical_peak_rebate': pd.BooleanDtype(), 'current_planned_operating_date': 'datetime64[ns]', 'customers': float, 'customer_class': pd.CategoricalDtype(categories=CUSTOMER_CLASSES), 'customer_incentives_cost': float, 'customer_incentives_incremental_cost': float, 'customer_incentives_incremental_life_cycle_cost': float, 'customer_other_costs_incremental_life_cycle_cost': float, 'daily_digital_access_customers': pd.Int64Dtype(), 'data_observed': pd.BooleanDtype(), 'datum': pd.StringDtype(), 'deliver_power_transgrid': pd.BooleanDtype(), 'delivery_customers': float, 'direct_load_control_customers': pd.Int64Dtype(), 'distributed_generation': pd.BooleanDtype(), 'distributed_generation_owned_capacity_mw': float, 'distribution_activity': pd.BooleanDtype(), 'distribution_circuits': pd.Int64Dtype(), 'duct_burners': pd.BooleanDtype(), 'energy_displaced_mwh': float, 'energy_efficiency_annual_cost': float, 'energy_efficiency_annual_actual_peak_reduction_mw': float, 'energy_efficiency_annual_effects_mwh': float, 'energy_efficiency_annual_incentive_payment': float, 'energy_efficiency_incremental_actual_peak_reduction_mw': float, 'energy_efficiency_incremental_effects_mwh': float, 'energy_savings_estimates_independently_verified': pd.BooleanDtype(), 'energy_savings_independently_verified': pd.BooleanDtype(), 'energy_savings_mwh': float, 'energy_served_ami_mwh': float, 'energy_source_1_transport_1': pd.StringDtype(), 'energy_source_1_transport_2': pd.StringDtype(), 'energy_source_1_transport_3': pd.StringDtype(), 'energy_source_2_transport_1': pd.StringDtype(), 'energy_source_2_transport_2': pd.StringDtype(), 'energy_source_2_transport_3': pd.StringDtype(), 'energy_source_code': pd.StringDtype(), 'energy_source_code_1': pd.StringDtype(), 'energy_source_code_2': pd.StringDtype(), 'energy_source_code_3': pd.StringDtype(), 'energy_source_code_4': pd.StringDtype(), 'energy_source_code_5': pd.StringDtype(), 'energy_source_code_6': pd.StringDtype(), 'energy_storage': pd.BooleanDtype(), 'entity_type': pd.CategoricalDtype(categories=ENTITY_TYPES.values()), 'estimated_or_actual_capacity_data': pd.CategoricalDtype( categories=ESTIMATED_OR_ACTUAL.values() ), 'estimated_or_actual_fuel_data': pd.CategoricalDtype( categories=ESTIMATED_OR_ACTUAL.values() ), 'estimated_or_actual_tech_data': pd.CategoricalDtype( categories=ESTIMATED_OR_ACTUAL.values() ), 'exchange_energy_delivered_mwh': float, 'exchange_energy_recieved_mwh': float, 'ferc_cogen_docket_no': pd.StringDtype(), 'ferc_cogen_status': pd.BooleanDtype(), 'ferc_exempt_wholesale_generator': pd.BooleanDtype(), 'ferc_exempt_wholesale_generator_docket_no': pd.StringDtype(), 'ferc_small_power_producer': pd.BooleanDtype(), 'ferc_small_power_producer_docket_no': pd.StringDtype(), 'fluidized_bed_tech': pd.BooleanDtype(), 'fraction_owned': float, 'fuel_class': pd.CategoricalDtype(categories=FUEL_CLASSES), 'fuel_consumed_for_electricity_mmbtu': float, 'fuel_consumed_for_electricity_units': float, 'fuel_consumed_mmbtu': float, 'fuel_consumed_units': float, 'fuel_cost_per_mmbtu': float, 'fuel_group_code': pd.StringDtype(), 'fuel_group_code_simple': pd.StringDtype(), 'fuel_mmbtu_per_unit': float, 'fuel_pct': float, 'fuel_received_units': float, 'fuel_type_code_aer': pd.StringDtype(), 'fuel_type_code_pudl': pd.StringDtype(), 'furnished_without_charge_mwh': float, 'generation_activity': pd.BooleanDtype(), # this is a mix of integer-like values (2 or 5) and strings like AUGSF 'generator_id': pd.StringDtype(), 'generators_number': float, 'generators_num_less_1_mw': float, 'green_pricing_revenue': float, 'grid_voltage_2_kv': float, 'grid_voltage_3_kv': float, 'grid_voltage_kv': float, 'highest_distribution_voltage_kv': float, 'home_area_network': pd.Int64Dtype(), 'inactive_accounts_included': pd.BooleanDtype(), 'incremental_energy_savings_mwh': float, 'incremental_life_cycle_energy_savings_mwh': float, 'incremental_life_cycle_peak_reduction_mwh': float, 'incremental_peak_reduction_mw': float, 'iso_rto_code': pd.StringDtype(), 'latitude': float, 'liquefied_natural_gas_storage': pd.BooleanDtype(), 'load_management_annual_cost': float, 'load_management_annual_actual_peak_reduction_mw': float, 'load_management_annual_effects_mwh': float, 'load_management_annual_incentive_payment': float, 'load_management_annual_potential_peak_reduction_mw': float, 'load_management_incremental_actual_peak_reduction_mw': float, 'load_management_incremental_effects_mwh': float, 'load_management_incremental_potential_peak_reduction_mw': float, 'longitude': float, 'major_program_changes': pd.BooleanDtype(), 'mercury_content_ppm': float, 'merge_address': pd.StringDtype(), 'merge_city': pd.StringDtype(), 'merge_company': pd.StringDtype(), 'merge_date': 'datetime64[ns]', 'merge_state': pd.StringDtype(), 'mine_id_msha': pd.Int64Dtype(), 'mine_id_pudl': pd.Int64Dtype(), 'mine_name': pd.StringDtype(), 'mine_type': pd.CategoricalDtype( categories=COALMINE_TYPES_EIA.values() ), 'minimum_load_mw': float, 'moisture_content_pct': float, 'momentary_interruption_definition': pd.CategoricalDtype( categories=MOMENTARY_INTERRUPTIONS.values() ), 'multiple_fuels': pd.BooleanDtype(), 'nameplate_power_factor': float, 'natural_gas_delivery_contract_type_code': pd.StringDtype(), 'natural_gas_local_distribution_company': pd.StringDtype(), 'natural_gas_pipeline_name_1': pd.StringDtype(), 'natural_gas_pipeline_name_2': pd.StringDtype(), 'natural_gas_pipeline_name_3': pd.StringDtype(), 'natural_gas_storage': pd.BooleanDtype(), 'natural_gas_transport_code': pd.StringDtype(), 'nerc_region': pd.CategoricalDtype(categories=NERC_REGIONS), 'nerc_regions_of_operation': pd.CategoricalDtype(categories=NERC_REGIONS), 'net_generation_mwh': float, 'net_metering': pd.BooleanDtype(), 'net_power_exchanged_mwh': float, 'net_wheeled_power_mwh': float, 'new_parent': pd.StringDtype(), 'non_amr_ami': pd.Int64Dtype(), 'nuclear_unit_id': pd.StringDtype(), 'operates_generating_plant': pd.BooleanDtype(), 'operating_date': 'datetime64[ns]', 'operating_switch': pd.StringDtype(), # TODO: double check this for early 860 years 'operational_status': pd.StringDtype(), 'operational_status_code': pd.StringDtype(), 'original_planned_operating_date': 'datetime64[ns]', 'other': float, 'other_combustion_tech': pd.BooleanDtype(), 'other_costs': float, 'other_costs_incremental_cost': float, 'other_modifications_date': 'datetime64[ns]', 'other_planned_modifications': pd.BooleanDtype(), 'outages_recorded_automatically': pd.BooleanDtype(), 'owned_by_non_utility': pd.BooleanDtype(), 'owner_city': pd.StringDtype(), 'owner_name': pd.StringDtype(), 'owner_state': pd.StringDtype(), 'owner_street_address': pd.StringDtype(), 'owner_utility_id_eia': pd.Int64Dtype(), 'owner_zip_code': pd.StringDtype(), # we should transition these into readable codes, not a one letter thing 'ownership_code': pd.StringDtype(), 'phone_extension': pd.StringDtype(), 'phone_extension_2': pd.StringDtype(), 'phone_number': pd.StringDtype(), 'phone_number_2': pd.StringDtype(), 'pipeline_notes': pd.StringDtype(), 'planned_derate_date': 'datetime64[ns]', 'planned_energy_source_code_1': pd.StringDtype(), 'planned_modifications': pd.BooleanDtype(), 'planned_net_summer_capacity_derate_mw': float, 'planned_net_summer_capacity_uprate_mw': float, 'planned_net_winter_capacity_derate_mw': float, 'planned_net_winter_capacity_uprate_mw': float, 'planned_new_capacity_mw': float, 'planned_new_prime_mover_code': pd.StringDtype(), 'planned_repower_date': 'datetime64[ns]', 'planned_retirement_date': 'datetime64[ns]', 'planned_uprate_date': 'datetime64[ns]', 'plant_id_eia': pd.Int64Dtype(), 'plant_id_epa': pd.Int64Dtype(), 'plant_id_pudl': pd.Int64Dtype(), 'plant_name_eia': pd.StringDtype(), 'plants_reported_asset_manager': pd.BooleanDtype(), 'plants_reported_operator': pd.BooleanDtype(), 'plants_reported_other_relationship': pd.BooleanDtype(), 'plants_reported_owner': pd.BooleanDtype(), 'point_source_unit_id_epa': pd.StringDtype(), 'potential_peak_demand_savings_mw': float, 'pulverized_coal_tech': pd.BooleanDtype(), 'previously_canceled': pd.BooleanDtype(), 'price_responsive_programes': pd.BooleanDtype(), 'price_responsiveness_customers': pd.Int64Dtype(), 'primary_transportation_mode_code': pd.StringDtype(), 'primary_purpose_id_naics': pd.Int64Dtype(), 'prime_mover_code': pd.StringDtype(), 'pv_current_flow_type': pd.CategoricalDtype(categories=['AC', 'DC']), 'reactive_power_output_mvar': float, 'real_time_pricing_program': pd.BooleanDtype(), 'rec_revenue': float, 'rec_sales_mwh': float, 'regulatory_status_code': pd.StringDtype(), 'report_date': 'datetime64[ns]', 'reported_as_another_company': pd.StringDtype(), 'respondent_frequency': pd.CategoricalDtype(categories=["A", "M", "AM"]), 'retail_marketing_activity': pd.BooleanDtype(), 'retail_sales': float, 'retail_sales_mwh': float, 'retirement_date': 'datetime64[ns]', 'revenue_class': pd.CategoricalDtype(categories=REVENUE_CLASSES), 'rto_iso_lmp_node_id': pd.StringDtype(), 'rto_iso_location_wholesale_reporting_id': pd.StringDtype(), 'rtos_of_operation': pd.StringDtype(), 'saidi_w_major_event_dats_minus_loss_of_service_minutes': float, 'saidi_w_major_event_days_minutes': float, 'saidi_wo_major_event_days_minutes': float, 'saifi_w_major_event_days_customers': float, 'saifi_w_major_event_days_minus_loss_of_service_customers': float, 'saifi_wo_major_event_days_customers': float, 'sales_for_resale': float, 'sales_for_resale_mwh': float, 'sales_mwh': float, 'sales_revenue': float, 'sales_to_ultimate_consumers_mwh': float, 'secondary_transportation_mode_code': pd.StringDtype(), 'sector_id_eia': pd.Int64Dtype(), 'sector_name_eia': pd.StringDtype(), 'service_area': pd.StringDtype(), 'service_type': pd.CategoricalDtype( categories=["bundled", "energy", "delivery"] ), 'short_form': pd.BooleanDtype(), 'sold_to_utility_mwh': float, 'solid_fuel_gasification': pd.BooleanDtype(), 'data_source': pd.StringDtype(), 'standard': pd.CategoricalDtype(categories=RELIABILITY_STANDARDS), 'startup_source_code_1': pd.StringDtype(), 'startup_source_code_2': pd.StringDtype(), 'startup_source_code_3': pd.StringDtype(), 'startup_source_code_4': pd.StringDtype(), 'state': pd.StringDtype(), 'state_id_fips': pd.StringDtype(), # Must preserve leading zeroes 'street_address': pd.StringDtype(), 'stoker_tech': pd.BooleanDtype(), 'storage_capacity_mw': float, 'storage_customers': pd.Int64Dtype(), 'subcritical_tech': pd.BooleanDtype(), 'sulfur_content_pct': float, 'summer_capacity_mw': float, 'summer_capacity_estimate': pd.BooleanDtype(), # TODO: check if there is any data pre-2016 'summer_estimated_capability_mw': float, 'summer_peak_demand_mw': float, 'supercritical_tech': pd.BooleanDtype(), 'supplier_name': pd.StringDtype(), 'switch_oil_gas': pd.BooleanDtype(), 'syncronized_transmission_grid': pd.BooleanDtype(), # Added by AES for NM & DG tech table (might want to consider merging with another fuel label) 'tech_class': pd.CategoricalDtype(categories=TECH_CLASSES), 'technology_description': pd.StringDtype(), 'time_cold_shutdown_full_load_code': pd.StringDtype(), 'time_of_use_pricing_program': pd.BooleanDtype(), 'time_responsive_programs': pd.BooleanDtype(), 'time_responsiveness_customers': pd.Int64Dtype(), 'timezone': pd.StringDtype(), 'topping_bottoming_code': pd.StringDtype(), 'total': float, 'total_capacity_less_1_mw': float, 'total_meters': pd.Int64Dtype(), 'total_disposition_mwh': float, 'total_energy_losses_mwh': float, 'total_sources_mwh': float, 'transmission': float, 'transmission_activity': pd.BooleanDtype(), 'transmission_by_other_losses_mwh': float, 'transmission_distribution_owner_id': pd.Int64Dtype(), 'transmission_distribution_owner_name': pd.StringDtype(), 'transmission_distribution_owner_state': pd.StringDtype(), 'turbines_inverters_hydrokinetics': float, 'turbines_num': pd.Int64Dtype(), # TODO: check if any turbines show up pre-2016 'ultrasupercritical_tech': pd.BooleanDtype(), 'unbundled_revenues': float, 'unit_id_eia': pd.StringDtype(), 'unit_id_pudl': pd.Int64Dtype(), 'uprate_derate_completed_date': 'datetime64[ns]', 'uprate_derate_during_year': pd.BooleanDtype(), 'utility_id_eia': pd.Int64Dtype(), 'utility_id_pudl': pd.Int64Dtype(), 'utility_name_eia': pd.StringDtype(), 'utility_owned_capacity_mw': float, # Added by AES for NNM table 'variable_peak_pricing_program': pd.BooleanDtype(), # Added by AES for DP table 'virtual_capacity_mw': float, # Added by AES for NM table 'virtual_customers': pd.Int64Dtype(), # Added by AES for NM table 'water_heater': pd.Int64Dtype(), # Added by AES for DR table 'water_source': pd.StringDtype(), 'weighted_average_life_years': float, 'wheeled_power_delivered_mwh': float, 'wheeled_power_recieved_mwh': float, 'wholesale_marketing_activity': pd.BooleanDtype(), 'wholesale_power_purchases_mwh': float, 'winter_capacity_mw': float, 'winter_capacity_estimate': pd.BooleanDtype(), 'winter_estimated_capability_mw': float, 'winter_peak_demand_mw': float, # 'with_med': float, # 'with_med_minus_los': float, # 'without_med': float, 'zip_code': pd.StringDtype(), 'zip_code_4': pd.StringDtype() }, 'depreciation': { 'utility_id_ferc1': pd.Int64Dtype(), 'utility_id_pudl': pd.Int64Dtype(), 'plant_id_pudl': pd.Int64Dtype(), # 'plant_name': pd.StringDtype(), 'note': pd.StringDtype(), 'report_year': int, 'report_date': 'datetime64[ns]', 'common': pd.BooleanDtype(), 'plant_balance': float, 'book_reserve': float, 'unaccrued_balance': float, 'reserve_pct': float, # 'survivor_curve_type': pd.StringDtype(), 'service_life_avg': float, 'net_salvage_pct': float, 'net_salvage_rate_type_pct': pd.BooleanDtype(), 'net_removal': float, 'net_removal_pct': float, 'remaining_life_avg': float, # 'retirement_date': 'datetime64[ns]', 'depreciation_annual_epxns': float, 'depreciation_annual_pct': float, 'depreciation_annual_rate_type_pct': pd.BooleanDtype(),
# 'data_source': pd.StringDtype(), } }