"""Routines used for extracting the raw FERC 714 data."""
from collections import OrderedDict
import pandas as pd
from dagster import AssetOut, Output, multi_asset
import pudl
[docs]
logger = pudl.logging_helpers.get_logger(__name__)
[docs]
FERC714_FILES: OrderedDict[str, dict[str, str]] = OrderedDict(
{
"id_certification": {
"name": "Part 1 Schedule 1 - Identification Certification.csv",
"encoding": "iso-8859-1",
},
"gen_plants_ba": {
"name": "Part 2 Schedule 1 - Balancing Authority Generating Plants.csv",
"encoding": "iso-8859-1",
},
"demand_monthly_ba": {
"name": "Part 2 Schedule 2 - Balancing Authority Monthly Demand.csv",
"encoding": "utf-8",
},
"net_energy_load_ba": {
"name": "Part 2 Schedule 3 - Balancing Authority Net Energy for Load.csv",
"encoding": "utf-8",
},
"adjacency_ba": {
"name": "Part 2 Schedule 4 - Adjacent Balancing Authorities.csv",
"encoding": "iso-8859-1",
},
"interchange_ba": {
"name": "Part 2 Schedule 5 - Balancing Authority Interchange.csv",
"encoding": "iso-8859-1",
},
"lambda_hourly_ba": {
"name": "Part 2 Schedule 6 - Balancing Authority Hourly System Lambda.csv",
"encoding": "utf-8",
},
"lambda_description": {
"name": "Part 2 Schedule 6 - System Lambda Description.csv",
"encoding": "iso-8859-1",
},
"description_pa": {
"name": "Part 3 Schedule 1 - Planning Area Description.csv",
"encoding": "iso-8859-1",
},
"demand_forecast_pa": {
"name": "Part 3 Schedule 2 - Planning Area Forecast Demand.csv",
"encoding": "utf-8",
},
"demand_hourly_pa": {
"name": "Part 3 Schedule 2 - Planning Area Hourly Demand.csv",
"encoding": "utf-8",
},
"respondent_id": {
"name": "Respondent IDs.csv",
"encoding": "utf-8",
},
}
)
"""Dictionary mapping PUDL tables to FERC-714 filenames and character encodings."""
@multi_asset(
outs={"raw_ferc714__" + table_name: AssetOut() for table_name in FERC714_FILES},
required_resource_keys={"datastore", "dataset_settings"},
)