"""Metadata and operational constants."""
import datetime
from typing import Callable, Dict, List, Type
import pandas as pd
import sqlalchemy as sa
[docs]FIELD_DTYPES: Dict[str, str] = {
"string": "string",
"number": "float64",
"integer": "Int64",
"boolean": "boolean",
"date": "datetime64[ns]",
"datetime": "datetime64[ns]",
"year": "datetime64[ns]",
}
"""
Pandas data type by PUDL field type (Data Package `field.type`).
"""
[docs]FIELD_DTYPES_SQL: Dict[str, sa.sql.visitors.VisitableType] = {
"boolean": sa.Boolean,
"date": sa.Date,
"datetime": sa.DateTime,
"integer": sa.Integer,
"number": sa.Float,
"string": sa.Text,
"year": sa.Integer,
}
"""
SQLAlchemy column types by PUDL field type (Data Package `field.type`).
"""
[docs]CONSTRAINT_DTYPES: Dict[str, Type] = {
'string': str,
'integer': int,
'year': int,
'number': float,
'boolean': bool,
'date': datetime.date,
'datetime': datetime.datetime
}
"""
Python types for field constraints by PUDL field type (Data Package `field.type`).
"""
[docs]LICENSES: Dict[str, Dict[str, str]] = {
"cc-by-4.0": {
"name": "CC-BY-4.0",
"title": "Creative Commons Attribution 4.0",
"path": "https://creativecommons.org/licenses/by/4.0",
},
"us-govt": {
"name": "other-pd",
"title": "U.S. Government Works",
"path": "https://www.usa.gov/government-works",
},
}
"""
License attributes by PUDL identifier.
"""
[docs]SOURCES: Dict[str, Dict[str, str]] = {
"eia860": {
"title": "EIA Form 860",
"path": "https://www.eia.gov/electricity/data/eia860",
},
"eia861": {
"title": "EIA Form 861: Annual Electric Power Industry Report",
"path": "https://www.eia.gov/electricity/data/eia861",
},
"eia923": {
"title": "EIA Form 923",
"path": "https://www.eia.gov/electricity/data/eia923",
},
"eiawater": {
"title": "EIA Thermoelectric cooling water data",
"path": "https://www.eia.gov/electricity/data/water",
},
"epacems": {
"title": "EPA Air Markets Program Data: Hourly Continuous Emission Monitoring System(CEMS)",
"path": "https://ampd.epa.gov/ampd",
},
"ferc1": {
"title": "FERC Form 1: Electric Utility Annual Report",
"path": "https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-1-electric-utility-annual",
},
"ferc714": {
"title": "FERC Form 714: Annual Electric Balancing Authority Area and Planning Area Report",
"path": "https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-no-714-annual-electric",
},
"ferceqr": {
"title": "FERC Form 920: Electric Quarterly Report (EQR)",
"path": "https://www.ferc.gov/industries-data/electric/power-sales-and-markets/electric-quarterly-reports-eqr",
},
"msha": {
"title": "Mine Safety and Health Administration (MSHA)",
"path": "https://arlweb.msha.gov/OpenGovernmentData/OGIMSHA.asp",
},
"phmsa": {
"title": "Pipelines and Hazardous Materials Safety Administration (PHMSA)",
"path": "https://www.phmsa.dot.gov/data-and-statistics/pipeline/data-and-statistics-overview",
},
"pudl": {
"title": "The Public Utility Data Liberation (PUDL) Project",
"path": "https://catalyst.coop/pudl",
"email": "pudl@catalyst.coop",
},
}
"""
Source attributes by PUDL identifier.
"""
[docs]CONTRIBUTORS: Dict[str, Dict[str, str]] = {
"catalyst-cooperative": {
"title": "Catalyst Cooperative",
"email": "pudl@catalyst.coop",
"path": "https://catalyst.coop",
"role": "publisher",
"organization": "Catalyst Cooperative",
},
"zane-selvans": {
"title": "Zane Selvans",
"email": "zane.selvans@catalyst.coop",
"path": "https://amateurearthling.org",
"role": "wrangler",
"organization": "Catalyst Cooperative",
},
"christina-gosnell": {
"title": "Christina Gosnell",
"email": "christina.gosnell@catalyst.coop",
"role": "contributor",
"organization": "Catalyst Cooperative",
},
"steven-winter": {
"title": "Steven Winter",
"email": "steven.winter@catalyst.coop",
"role": "contributor",
"organization": "Catalyst Cooperative",
},
"alana-wilson": {
"title": "Alana Wilson",
"email": "alana.wilson@catalyst.coop",
"role": "contributor",
"organization": "Catalyst Cooperative",
},
"karl-dunkle-werner": {
"title": "Karl Dunkle Werner",
"email": "karldw@berkeley.edu",
"path": "https://karldw.org",
"role": "contributor",
"organization": "UC Berkeley",
},
"greg-schivley": {
"title": "Greg Schivley",
"path": "https://gschivley.github.io",
"role": "contributor",
"organization": "Carbon Impact Consulting",
},
}
"""
Contributor attributes by PUDL identifier.
"""
[docs]CONTRIBUTORS_BY_SOURCE: Dict[str, List[str]] = {
"pudl": [
"catalyst-cooperative",
"zane-selvans",
"christina-gosnell",
"steven-winter",
"alana-wilson",
"karl-dunkle-werner",
],
"eia923": [
"catalyst-cooperative",
"zane-selvans",
"christina-gosnell",
"steven-winter",
],
"eia860": [
"catalyst-cooperative",
"zane-selvans",
"christina-gosnell",
"steven-winter",
"alana-wilson",
],
"ferc1": [
"catalyst-cooperative",
"zane-selvans",
"christina-gosnell",
"steven-winter",
"alana-wilson",
],
"epacems": [
"catalyst-cooperative",
"karl-dunkle-werner",
"zane-selvans",
],
}
"""
Contributors (PUDL identifiers) by source (PUDL identifier).
"""
[docs]KEYWORDS_BY_SOURCE: Dict[str, List[str]] = {
"pudl": ["us", "electricity"],
"eia860": [
"electricity",
"electric",
"boiler",
"generator",
"plant",
"utility",
"fuel",
"coal",
"natural gas",
"prime mover",
"eia860",
"retirement",
"capacity",
"planned",
"proposed",
"energy",
"hydro",
"solar",
"wind",
"nuclear",
"form 860",
"eia",
"annual",
"gas",
"ownership",
"steam",
"turbine",
"combustion",
"combined cycle",
"eia",
"energy information administration",
],
"eia923": [
"fuel",
"boiler",
"generator",
"plant",
"utility",
"cost",
"price",
"natural gas",
"coal",
"eia923",
"energy",
"electricity",
"form 923",
"receipts",
"generation",
"net generation",
"monthly",
"annual",
"gas",
"fuel consumption",
"MWh",
"energy information administration",
"eia",
"mercury",
"sulfur",
"ash",
"lignite",
"bituminous",
"subbituminous",
"heat content",
],
"epacems": [
"epa",
"us",
"emissions",
"pollution",
"ghg",
"so2",
"co2",
"sox",
"nox",
"load",
"utility",
"electricity",
"plant",
"generator",
"unit",
"generation",
"capacity",
"output",
"power",
"heat content",
"mmbtu",
"steam",
"cems",
"continuous emissions monitoring system",
"hourly",
"environmental protection agency",
"ampd",
"air markets program data",
],
"ferc1": [
"electricity",
"electric",
"utility",
"plant",
"steam",
"generation",
"cost",
"expense",
"price",
"heat content",
"ferc",
"form 1",
"federal energy regulatory commission",
"capital",
"accounting",
"depreciation",
"finance",
"plant in service",
"hydro",
"coal",
"natural gas",
"gas",
"opex",
"capex",
"accounts",
"investment",
"capacity",
],
}
"""
Keywords by source (PUDL identifier).
"""
[docs]PERIODS: Dict[str, Callable[[pd.Series], pd.Series]] = {
"year": lambda x: x.astype("datetime64[Y]"),
"quarter": lambda x: x.apply(
pd.tseries.offsets.QuarterBegin(startingMonth=1).rollback
),
"month": lambda x: x.astype("datetime64[M]"),
"date": lambda x: x.astype("datetime64[D]"),
}
"""
Functions converting datetimes to period start times, by time period.
"""