Source code for dowhy.causal_estimators.econml

import inspect
from importlib import import_module

import econml
import numpy as np
import pandas as pd

from dowhy.causal_estimator import CausalEstimate, CausalEstimator
from dowhy.utils.api import parse_state


[docs]class Econml(CausalEstimator): """Wrapper class for estimators from the EconML library. For a list of standard args and kwargs, see documentation for :class:`~dowhy.causal_estimator.CausalEstimator`. Supports additional parameters as listed below. For init and fit parameters of each estimator, refer to the EconML docs. """ def __init__(self, *args, econml_methodname, **kwargs): """ :param econml_methodname: Fully qualified name of econml estimator class. For example, 'econml.dml.DML' """ # Required to ensure that self.method_params contains all the # parameters to create an object of this class args_dict = {k: v for k, v in locals().items() if k not in type(self)._STD_INIT_ARGS} args_dict.update(kwargs) super().__init__(*args, **args_dict) self._econml_methodname = econml_methodname self.logger.info("INFO: Using EconML Estimator") self.identifier_method = self._target_estimand.identifier_method self._observed_common_causes_names = self._target_estimand.get_backdoor_variables().copy() # For metalearners only--issue a warning if w contains variables not in x (module_name, _, class_name) = self._econml_methodname.rpartition(".") if module_name.endswith("metalearners"): effect_modifier_names = [] if self._effect_modifier_names is not None: effect_modifier_names = self._effect_modifier_names.copy() w_diff_x = [w for w in self._observed_common_causes_names if w not in effect_modifier_names] if len(w_diff_x) > 0: self.logger.warn( "Concatenating common_causes and effect_modifiers and providing a single list of variables to metalearner estimator method, " + class_name + ". EconML metalearners accept a single X argument." ) effect_modifier_names.extend(w_diff_x) # Override the effect_modifiers set in CausalEstimator.__init__() # Also only update self._effect_modifiers, and create a copy of self._effect_modifier_names # the latter can be used by other estimator methods later self._effect_modifiers = self._data[effect_modifier_names] self._effect_modifiers = pd.get_dummies(self._effect_modifiers, drop_first=True) self._effect_modifier_names = effect_modifier_names self.logger.debug("Effect modifiers: " + ",".join(effect_modifier_names)) if self._observed_common_causes_names: self._observed_common_causes = self._data[self._observed_common_causes_names] self._observed_common_causes = pd.get_dummies(self._observed_common_causes, drop_first=True) else: self._observed_common_causes = None self.logger.debug("Back-door variables used:" + ",".join(self._observed_common_causes_names)) # Instrumental variables names, if present # choosing the instrumental variable to use if getattr(self, "iv_instrument_name", None) is None: self.estimating_instrument_names = self._target_estimand.instrumental_variables else: self.estimating_instrument_names = parse_state(self.iv_instrument_name) if self.estimating_instrument_names: self._estimating_instruments = self._data[self.estimating_instrument_names] self._estimating_instruments = pd.get_dummies(self._estimating_instruments, drop_first=True) else: self._estimating_instruments = None self.estimator = None self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand) self.logger.info(self.symbolic_estimator) def _get_econml_class_object(self, module_method_name, *args, **kwargs): # from https://www.bnmetrics.com/blog/factory-pattern-in-python3-simple-version try: (module_name, _, class_name) = module_method_name.rpartition(".") estimator_module = import_module(module_name) estimator_class = getattr(estimator_module, class_name) except (AttributeError, AssertionError, ImportError): raise ImportError( "Error loading {}.{}. Double-check the method name and ensure that all econml dependencies are installed.".format( module_name, class_name ) ) return estimator_class def _estimate_effect(self): n_samples = self._treatment.shape[0] X = None # Effect modifiers W = None # common causes/ confounders Z = None # Instruments Y = self._outcome T = self._treatment if self._effect_modifiers is not None: X = self._effect_modifiers if self._observed_common_causes_names: W = self._observed_common_causes if self.estimating_instrument_names: Z = self._estimating_instruments named_data_args = {"Y": Y, "T": T, "X": X, "W": W, "Z": Z} if self.estimator is None: estimator_class = self._get_econml_class_object(self._econml_methodname) self.estimator = estimator_class(**self.method_params["init_params"]) # Calling the econml estimator's fit method estimator_argspec = inspect.getfullargspec(inspect.unwrap(self.estimator.fit)) # As of v0.9, econml has some kewyord only arguments estimator_named_args = estimator_argspec.args + estimator_argspec.kwonlyargs estimator_data_args = { arg: named_data_args[arg] for arg in named_data_args.keys() if arg in estimator_named_args } if self.method_params["fit_params"] is not False: self.estimator.fit(**estimator_data_args, **self.method_params["fit_params"]) X_test = X n_target_units = n_samples if X is not None: if type(self._target_units) is pd.DataFrame: X_test = self._target_units elif callable(self._target_units): filtered_rows = self._data.where(self._target_units) boolean_criterion = np.array(filtered_rows.notnull().iloc[:, 0]) X_test = X[boolean_criterion] n_target_units = X_test.shape[0] # Changing shape to a list for a singleton value if type(self._control_value) is not list: self._control_value = [self._control_value] if type(self._treatment_value) is not list: self._treatment_value = [self._treatment_value] T0_test = np.repeat([self._control_value], n_target_units, axis=0) T1_test = np.repeat([self._treatment_value], n_target_units, axis=0) est = self.estimator.effect(X_test, T0=T0_test, T1=T1_test) ate = np.mean(est) self.effect_intervals = None if self._confidence_intervals: self.effect_intervals = self.estimator.effect_interval( X_test, T0=T0_test, T1=T1_test, alpha=1 - self.confidence_level ) estimate = CausalEstimate( estimate=ate, control_value=self._control_value, treatment_value=self._treatment_value, target_estimand=self._target_estimand, realized_estimand_expr=self.symbolic_estimator, cate_estimates=est, effect_intervals=self.effect_intervals, _estimator_object=self.estimator, ) return estimate def _estimate_confidence_intervals(self, confidence_level=None, method=None): """Returns None if the confidence interval has not been calculated.""" return self.effect_intervals def _do(self, x): raise NotImplementedError
[docs] def construct_symbolic_estimator(self, estimand): expr = "b: " + ", ".join(estimand.outcome_variable) + "~" # TODO -- fix: we are actually conditioning on positive treatment (d=1) (module_name, _, class_name) = self._econml_methodname.rpartition(".") if module_name.endswith("metalearners"): var_list = estimand.treatment_variable + self._effect_modifier_names expr += "+".join(var_list) else: var_list = estimand.treatment_variable + self._observed_common_causes_names expr += "+".join(var_list) expr += " | " + ",".join(self._effect_modifier_names) return expr
[docs] def shap_values(self, df: pd.DataFrame, *args, **kwargs): return self.estimator.shap_values(df[self._effect_modifier_names].values, *args, **kwargs)
[docs] def effect(self, df: pd.DataFrame, *args, **kwargs) -> np.ndarray: return self.estimator.effect(df[self._effect_modifier_names].values, *args, **kwargs)
[docs] def effect_inference(self, df: pd.DataFrame, *args, **kwargs) -> np.ndarray: return self.estimator.effect_inference(df[self._effect_modifier_names].values, *args, **kwargs)