"""Provides tooling for developing/tracking ml models within PUDL.The main interface from this module is the :func:`pudl_model` decorator, whichis meant to be applied to a dagster `graph`. This decorator will handle finding allconfiguration for a model/passing configuration to dagster, creating an:class:`ExperimentTracker` for the model, and ultimately will return a `graph_asset`from the model.There are a few different ways to provide configuration for a PUDL model. First, configuration will come from default values for any dagster `Config`'s which are associatedwith `op`'s which make up the model `graph`. For more info on dagster configuration,see https://docs.dagster.io/concepts/configuration/config-schema. The next way toprovide configuration is through the yaml file: `pudl.package_data.settings.pudl_models.yml`.Any configuration in this file should be follow dagster's config-schema formatting,see the `ferc_to_ferc` entry as an example. Configuration provided this way willoverride any default values. The final way to provide configuration is through thedagster UI. To provide configuration this way, click `Open Launchpad` in the UI, andvalues can be edited here. This configuration will override both default values andyaml configuration, but will only be used for a single run."""importimportlibimportyamlfromdagsterimport(AssetIn,AssetsDefinition,GraphDefinition,OpDefinition,graph_asset,)importpudlfrom.importexperiment_tracking
[docs]defget_yml_config(experiment_name:str)->dict:"""Load model configuration from yaml file."""config_file=(importlib.resources.files("pudl.package_data.settings")/"pudl_models.yml")config=yaml.safe_load(config_file.open("r"))ifnot(model_config:=config.get(experiment_name)):raiseRuntimeError(f"No {experiment_name} entry in {config_file}")return{experiment_name:model_config}
[docs]defget_default_config(model_graph:GraphDefinition)->dict:"""Get default config values for model."""def_get_default_from_ops(node:OpDefinition|GraphDefinition):config={}ifisinstance(node,GraphDefinition):config={"ops":{child_node.name:_get_default_from_ops(child_node)forchild_nodeinnode.node_defs}}else:ifnode.config_schema.default_provided:config={"config":node.config_schema.default_value}else:config={"config":None}returnconfigconfig={model_graph.name:_get_default_from_ops(model_graph)}config[f"{model_graph.name}_tracker"]={"config":experiment_tracking.ExperimentTrackerConfig().model_dump()}returnconfig
[docs]defpudl_model(asset_name:str,config_from_yaml:bool=False)->AssetsDefinition:"""Decorator for an ML model that will handle providing configuration to dagster."""def_decorator(model_graph:GraphDefinition):model_config=get_default_config(model_graph)ifconfig_from_yaml:model_config|=get_yml_config(model_graph.name)MODEL_CONFIGURATION[asset_name]={"ops":model_config}# Inputs should come from assets except experiment trackerins={key:AssetIn(key)forkeyinmodel_graph.input_dictifkey!="experiment_tracker"}@graph_asset(name=asset_name,ins=ins)defmodel_asset(**kwargs):experiment_tracker=experiment_tracking.experiment_tracker_factory(experiment_name=model_graph.name,model_config=model_config,)()returnmodel_graph(experiment_tracker,**kwargs)returnmodel_assetreturn_decorator