Source code for pudl.metadata.sources

"""Metadata and operational constants."""
from typing import Any

from pudl.metadata.constants import CONTRIBUTORS, KEYWORDS, LICENSES
from pudl.metadata.enums import EPACEMS_STATES

[docs]SOURCES: dict[str, Any] = { "censusdp1tract": { "title": "Census DP1", "path": "https://www.census.gov/geographies/mapping-files/2010/geo/tiger-data.html", "description": ( "US Census Demographic Profile 1 (DP1) County and Tract GeoDatabase." ), "working_partitions": {}, # Census DP1 is monolithic. "keywords": sorted( { "censusdp1tract", "census", } ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "eia860": { "title": "EIA Form 860", "path": "https://www.eia.gov/electricity/data/eia860", "description": ( "US Energy Information Administration (EIA) Form 860 data for " "electric power plants with 1 megawatt or greater combined nameplate " "capacity." ), "source_file_dict": { "respondents": "Utilities", "records_liberated": "~1 million", "source_format": "Microsoft Excel (.xls/.xlsx)", }, "field_namespace": "eia", "contributors": [ CONTRIBUTORS["catalyst-cooperative"], CONTRIBUTORS["zane-selvans"], CONTRIBUTORS["christina-gosnell"], CONTRIBUTORS["steven-winter"], CONTRIBUTORS["alana-wilson"], ], "working_partitions": { "years": sorted(set(range(2001, 2022))), }, "keywords": sorted( set( [ "eia860", "form 860", "ownership", "annual", "yearly", ] + KEYWORDS["eia"] + KEYWORDS["us_govt"] + KEYWORDS["electricity"] + KEYWORDS["fuels"] + KEYWORDS["plants"] + KEYWORDS["environment"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "eia860m": { "title": "EIA Form 860m", "path": "https://www.eia.gov/electricity/data/eia860m", "description": ( "US Energy Information Administration (EIA) Form 860 M data for " "electric power plants with 1 megawatt or greater combined nameplate " "capacity. Preliminary Monthly Electric Generator Inventory (based on " "Form EIA-860M as a supplement to Form EIA-860)" ), "field_namespace": "eia", "contributors": [], "working_partitions": { "year_month": "2022-09", }, "keywords": sorted( set( [ "eia860m", "form 860m", "monthly", ] + KEYWORDS["eia"] + KEYWORDS["us_govt"] + KEYWORDS["electricity"] + KEYWORDS["fuels"] + KEYWORDS["plants"] + KEYWORDS["environment"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "eia861": { "title": "EIA Form 861: Annual Electric Power Industry Report", "path": "https://www.eia.gov/electricity/data/eia861", "description": ( "EIA Form 861 Annual Electric Power Industry Report, detailed " "data files." ), "field_namespace": "eia", "working_partitions": { "years": sorted(set(range(2001, 2022))), }, "contributors": [], "keywords": sorted( set( [ "eia861", "form 861", "balancing authority", "advanced metering infrastructure", "customer class", "green pricing", "energy efficiency", "demand side management", "demand response", "net metering", "business model", "service territory", "annual", "yearly", ] + KEYWORDS["eia"] + KEYWORDS["us_govt"] + KEYWORDS["electricity"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "eia923": { "title": "EIA Form 923", "path": "https://www.eia.gov/electricity/data/eia923", "description": ( "The EIA Form 923 collects detailed monthly and annual electric " "power data on electricity generation, fuel consumption, fossil fuel " "stocks, and receipts at the power plant and prime mover level." ), "source_file_dict": { "respondents": ( "Electric, CHP plants, and sometimes fuel transfer termianls with " "either 1MW+ or the ability to receive and deliver power to the grid." ), "records_liberated": "~5 million", "source_format": "Microsoft Excel (.xls/.xlsx)", }, "field_namespace": "eia", "working_partitions": { "years": sorted(set(range(2001, 2022))), }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], CONTRIBUTORS["zane-selvans"], CONTRIBUTORS["christina-gosnell"], CONTRIBUTORS["steven-winter"], CONTRIBUTORS["katherine-lamb"], ], "keywords": sorted( set( [ "eia923", "form 923", "monthly", ] + KEYWORDS["eia"] + KEYWORDS["us_govt"] + KEYWORDS["electricity"] + KEYWORDS["fuels"] + KEYWORDS["plants"] + KEYWORDS["environment"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "eia_bulk_elec": { "title": "EIA Bulk Electricity API Data", "path": "https://www.eia.gov/opendata/bulkfiles.php", "description": ( "Aggregate national, state, and plant-level electricity generation " "statistics, including fuel quality and consumption, for grid-connected " "plants with nameplate capacity of 1 megawatt or greater." "\n" "At present, PUDL integrates only a few specific data series related to " "fuel receipts and costs figures." ), "source_file_dict": { "respondents": ( "Electric, CHP plants, and sometimes fuel transfer termianls with " "either 1MW+ or the ability to receive and deliver power to the grid." ), "source_format": "JSON", }, "field_namespace": "eia", "working_partitions": {}, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], CONTRIBUTORS["zane-selvans"], CONTRIBUTORS["trenton-bush"], ], "keywords": sorted( set( KEYWORDS["eia"] + KEYWORDS["us_govt"] + KEYWORDS["electricity"] + KEYWORDS["environment"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "eiawater": { "title": "EIA Thermoelectric Cooling Water", "path": "https://www.eia.gov/electricity/data/water", "description": ( "Monthly cooling water usage by generator and boiler. Data " "collected in conjunction with the EIA-860 and EIA-923." ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "epacems": { "title": "EPA Hourly Continuous Emission Monitoring System (CEMS)", "path": "https://campd.epa.gov/", "description": ( "US EPA hourly Continuous Emissions Monitoring System (CEMS) data." "Hourly CO2, SO2, NOx emissions and gross load." ), "source_file_dict": { "respondents": "Coal and high-sulfur fueled plants over 25MW", "records_liberated": "~800 million", "source_format": "Comma Separated Value (.csv)", }, "field_namespace": "epacems", "working_partitions": { "years": sorted(set(range(1995, 2022))), "states": sorted(EPACEMS_STATES), }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], CONTRIBUTORS["karl-dunkle-werner"], CONTRIBUTORS["zane-selvans"], ], "keywords": sorted( set( [ "continuous emissions monitoring system", "cems", "air markets program data", "ampd", "hourly", ] + KEYWORDS["epa"] + KEYWORDS["us_govt"] + KEYWORDS["electricity"] + KEYWORDS["environment"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "epacamd_eia": { "title": "EPA CAMD to EIA Data Crosswalk", "path": "https://github.com/USEPA/camd-eia-crosswalk", "description": ( "A file created collaboratively by EPA and EIA that connects EPA CAMD " "smokestacks (units) with cooresponding EIA plant part ids reported in " "EIA Forms 860 and 923 (plant_id_eia, boiler_id, generator_id). This " "one-to-many connection is necessary because pollutants from various plant " "parts are collecitvely emitted and measured from one point-source." ), "source_file_dict": { "records_liberated": "~7000", "source_format": "Comma Separated Value (.csv)", }, "field_namespace": "glue", "working_partitions": {}, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], CONTRIBUTORS["austen-sharpe"], ], "keywords": sorted( set( [ "continuous emissions monitoring system", "cems", "air markets program data", "ampd", "hourly", "eia", "crosswalk", ] + KEYWORDS["epa"] + KEYWORDS["us_govt"] + KEYWORDS["electricity"] + KEYWORDS["environment"] + KEYWORDS["eia"] + KEYWORDS["plants"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "ferc1": { "title": "FERC Form 1", "path": "https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-1-electric-utility-annual", "description": ( "The Federal Energy Regulatory Commission (FERC) Form 1 is a " "comprehensive financial and operating report submitted annually for " "electric rate regulation, market oversight analysis, and financial audits " "by Major electric utilities, licensees and others." ), "source_file_dict": { "respondents": "Major electric utilities and licenses.", "records_liberated": "~13.2 million (116 raw tables), ~307,000 (7 clean tables)", "source_format": "XBRL (.XBRL) and Visual FoxPro Database (.DBC/.DBF)", }, "field_namespace": "ferc1", "working_partitions": { "years": sorted(set(range(1994, 2022))), }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], CONTRIBUTORS["zane-selvans"], CONTRIBUTORS["christina-gosnell"], CONTRIBUTORS["steven-winter"], CONTRIBUTORS["alana-wilson"], CONTRIBUTORS["austen-sharpe"], ], "keywords": sorted( set( [ "form 1", "ferc1", ] + KEYWORDS["ferc"] + KEYWORDS["us_govt"] + KEYWORDS["finance"] + KEYWORDS["electricity"] + KEYWORDS["plants"] + KEYWORDS["fuels"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "ferc2": { "title": "FERC Form 2", "path": "https://www.ferc.gov/industries-data/natural-gas/industry-forms/form-2-2a-3-q-gas-historical-vfp-data", "description": ( "The Federal Energy Regulatory Commission (FERC) Form 2 is a " "comprehensive financial and operating report submitted for natural gas " "pipelines rate regulation and financial audits." ), "field_namespace": "ferc2", "working_partitions": { "years": sorted(set(range(2021, 2022))), # XBRL only }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], ], "keywords": sorted( set( [ "form 2", "ferc2", ] + KEYWORDS["ferc"] + KEYWORDS["finance"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "ferc6": { "title": "FERC Form 6", "path": "https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-66-q-overview-orders", "description": ( "The Federal Energy Regulatory Commission (FERC) Form 6 is a " "comprehensive financial and operating report submitted for oil " "pipelines rate regulation and financial audits." ), "field_namespace": "ferc6", "working_partitions": { "years": sorted(set(range(2021, 2022))), # XBRL only }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], ], "keywords": sorted( set( [ "form 6", "ferc6", ] + KEYWORDS["ferc"] + KEYWORDS["finance"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "ferc60": { "title": "FERC Form 60", "path": "https://www.ferc.gov/ferc-online/ferc-online/filing-forms/service-companies-filing-forms/form-60-annual-report", "description": ( "The Federal Energy Regulatory Commission (FERC) Form 60 is a " "comprehensive financial and operating report submitted for centralized " "service companies." ), "field_namespace": "ferc60", "working_partitions": { "years": sorted(set(range(2021, 2022))), # XBRL only }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], ], "keywords": sorted( set( [ "form 60", "ferc60", ] + KEYWORDS["ferc"] + KEYWORDS["finance"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "ferc714": { "title": "FERC Form 714 - Annual Electric Balancing Authority Area and Planning Area Report", "path": "https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-no-714-annual-electric", "description": ( "Electric transmitting utilities operating balancing authority " "areas and planning areas with annual peak demand over 200MW are required " "to file Form 714 with the Federal Energy Regulatory Commission (FERC), " "reporting balancing authority area generation, actual and scheduled " "inter-balancing authority area power transfers, and net energy for load, " "summer-winter generation peaks and system lambda." ), "field_namespace": "ferc714", "working_partitions": { "years": sorted(set(range(2021, 2022))), # Pre-XBRL data is monolithic }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], ], "keywords": sorted( set( [ "form 714", "ferc714", ] + KEYWORDS["ferc"] + KEYWORDS["us_govt"] + KEYWORDS["electricity"] ) ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "ferceqr": { "title": "FERC Form 920: Electric Quarterly Report (EQR)", "path": "https://www.ferc.gov/industries-data/electric/power-sales-and-markets/electric-quarterly-reports-eqr", "description": ( "The EQR contains Seller-provided data summarizing contractual terms and " "conditions in agreements for all jurisdictional services, including " "cost-based sales, market-based rate sales, and transmission service, " "as well as transaction information for short-term and long-term " "market-based power sales and cost-based power sales." ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "mshamines": { "title": "Mine Safety and Health Administration (MSHA) Mines", "path": "https://arlweb.msha.gov/OpenGovernmentData/OGIMSHA.asp", "description": ( "The Mine dataset lists all Coal and Metal/Non-Metal mines under MSHA's " "jurisdiction. It includes such information as the current status of each " "mine (Active, Abandoned, NonProducing, etc.), the current owner and " "operating company, commodity codes and physical attributes of the mine." ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "phmsagas": { "title": "Pipelines and Hazardous Materials Safety Administration (PHMSA) Annual Natural Gas Report", "path": "https://www.phmsa.dot.gov/data-and-statistics/pipeline/gas-distribution-gas-gathering-gas-transmission-hazardous-liquids", "description": ( "Annual reports submitted to PHMSA from gas distribution, gas gathering, " "gas transmission, liquefied natural gas, and underground gas storage " "system operators. Annual reports include information such as total " "pipeline mileage, facilities, commodities transported, miles by material, " "and installation dates." ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, "pudl": { "title": "The Public Utility Data Liberation (PUDL) Project", "path": "https://catalyst.coop/pudl", "description": ( "PUDL is a data processing pipeline created by Catalyst Cooperative that " "cleans, integrates, and standardizes some of the most widely used public " "energy datasets in the US. The data serve researchers, activists, " "journalists, and policy makers that might not have the technical expertise " "to access it in its raw form, the time to clean and prepare the data for " "bulk analysis, or the means to purchase it from existing commercial " "providers." ), "contributors": [ CONTRIBUTORS["catalyst-cooperative"], CONTRIBUTORS["zane-selvans"], CONTRIBUTORS["christina-gosnell"], CONTRIBUTORS["steven-winter"], ], "field_namespace": "pudl", "keywords": ["us", "electricity", "open data", "open source"], "license_raw": LICENSES["cc-by-4.0"], "license_pudl": LICENSES["cc-by-4.0"], "email": "pudl@catalyst.coop", }, }
"""Data source attributes by PUDL identifier."""