Source code for dowhy.causal_refuter

import logging
import numpy as np
import scipy.stats as st
import random

from dowhy.utils.api import parse_state

[docs]class CausalRefuter:

    """Base class for different refutation methods.

    Subclasses implement specific refutations methods.

    """
    # Default value for the number of simulations to be conducted
    DEFAULT_NUM_SIMULATIONS = 100

    def __init__(self, data, identified_estimand, estimate, **kwargs):
        self._data = data
        self._target_estimand = identified_estimand
        self._estimate = estimate
        self._treatment_name = self._target_estimand.treatment_variable
        self._outcome_name = self._target_estimand.outcome_variable
        self._random_seed = None

        if "random_seed" in kwargs:
            self._random_seed = kwargs['random_seed']
            np.random.seed(self._random_seed)

        self.logger = logging.getLogger(__name__)

        # Concatenate the confounders, instruments and effect modifiers
        try:
            self._variables_of_interest = self._target_estimand.get_backdoor_variables() + \
                                        self._target_estimand.instrumental_variables + \
                                        self._estimate.params['effect_modifiers']
        except AttributeError as attr_error:
            self.logger.error(attr_error)

[docs]    def choose_variables(self, required_variables):
        '''
            This method provides a way to choose the confounders whose values we wish to
            modify for finding its effect on the ability of the treatment to affect the outcome.
        '''

        invert = None

        if required_variables is False:

            self.logger.info("All variables required: Running bootstrap adding noise to confounders, instrumental variables and effect modifiers.")
            return None

        elif required_variables is True:

            self.logger.info("All variables required: Running bootstrap adding noise to confounders, instrumental variables and effect modifiers.")
            return self._variables_of_interest

        elif type(required_variables) is int:

            if len(self._variables_of_interest) < required_variables:
                self.logger.error("Too many variables passed.\n The number of  variables is: {}.\n The number of variables passed: {}".format(
                    len(self._variables_of_interest),
                    required_variables )
                )
                raise ValueError("The number of variables in the required_variables is greater than the number of confounders, instrumental variables and effect modifiers")
            else:
                # Shuffle the confounders
                random.shuffle(self._variables_of_interest)
                return self._variables_of_interest[:required_variables]

        elif type(required_variables) is list:

           # Check if all are select or deselect variables
            if all(variable[0] == '-' for variable in required_variables):
                invert = True
                required_variables = [variable[1:] for variable in required_variables]
            elif all(variable[0] != '-' for variable in required_variables):
                invert = False
            else:
                self.logger.error("{} has both select and delect variables".format(required_variables))
                raise ValueError("It appears that there are some select and deselect variables. Note you can either select or delect variables at a time, but not both")

            # Check if all the required_variables belong to confounders, instrumental variables or effect
            if set(required_variables) - set(self._variables_of_interest) != set([]):
                self.logger.error("{} are not confounder, instrumental variable or effect modifier".format( list( set(required_variables) - set(self._variables_of_interest) ) ))
                raise ValueError("At least one of required_variables is not a valid variable name, or it is not a confounder, instrumental variable or effect modifier")

            if invert is False:
                return required_variables
            elif invert is True:
                return list( set(self._variables_of_interest) - set(required_variables) )

        else:
            self.logger.error("Incorrect type: {}. Expected an int,list or bool".format( type(required_variables) ) )
            raise TypeError("Expected int, list or bool. Got an unexpected datatype")

[docs]    def test_significance(self, estimate, simulations, test_type='auto',significance_level=0.05):
        """ Tests the statistical significance of the estimate obtained to the simulations produced by a refuter.

        The basis behind using the sample statistics of the refuter when we are in fact testing the estimate,
        is due to the fact that, we would ideally expect them to follow the same distribition

        For refutation tests (e.g., placebo refuters), consider the null distribution as a distribution of effect
        estimates over multiple simulations with placebo treatment, and compute how likely the true estimate (e.g.,
        zero for placebo test) is under the null. If the probability of true effect estimate is lower than the
        p-value, then estimator method fails the test.

        For sensitivity analysis tests (e.g., bootstrap, subset or common cause refuters), the null distribution captures
        the distribution of effect estimates under the "true" dataset (e.g., with an additional confounder or different
        sampling), and we compute the probability of the obtained estimate under this distribution. If the probability is
        lower than the p-value, then the estimator method fails the test

        Null Hypothesis: The estimate is a part of the distribution
        Alternative Hypothesis: The estimate does not fall in the distribution.

        :param 'estimate': CausalEstimate
        The estimate obtained from the estimator for the original data.
        :param 'simulations': np.array
        An array containing the result of the refuter for the simulations
        :param 'test_type': string, default 'auto'
        The type of test the user wishes to perform.
        :param 'significance_level': float, default 0.05
        The significance level for the statistical test

        :returns: significance_dict: Dict
        A Dict containing the p_value and a boolean that indicates if the result is statistically significant
        """
        # Initializing the p_value
        p_value = 0

        if test_type == 'auto':
            num_simulations = len(simulations)
            if num_simulations >= 100: # Bootstrapping
                self.logger.info("Making use of Bootstrap as we have more than 100 examples.\n \
                Note: The greater the number of examples, the more accurate are the confidence estimates")

                # Perform Bootstrap Significance Test with the original estimate and the set of refutations
                p_value = self.perform_bootstrap_test(estimate, simulations)

            else:
                self.logger.warning("We assume a Normal Distribution as the sample has less than 100 examples.\n \
                Note: The underlying distribution may not be Normal. We assume that it approaches normal with the increase in sample size.")

                # Perform Normal Tests of Significance with the original estimate and the set of refutations
                p_value = self.perform_normal_distribution_test(estimate, simulations)

        elif test_type == 'bootstrap':
            self.logger.info("Performing Bootstrap Test with {} samples\n \
            Note: The greater the number of examples, the more accurate are the confidence estimates".format( len(simulations) ) )

            # Perform Bootstrap Significance Test with the original estimate and the set of refutations
            p_value = self.perform_bootstrap_test(estimate, simulations)

        elif test_type == 'normal_test':
            self.logger.info("Performing Normal Test with {} samples\n \
            Note: We assume that the underlying distribution is Normal.".format( len(simulations) ) )

            # Perform Normal Tests of Significance with the original estimate and the set of refutations
            p_value = self.perform_normal_distribution_test(estimate, simulations)

        else:
            raise NotImplementedError

        significance_dict = {
                "p_value":p_value,
                "is_statistically_significant": p_value <= significance_level
                }

        return significance_dict

[docs]    def perform_bootstrap_test(self, estimate, simulations):

        # Get the number of simulations
        num_simulations = len(simulations)
        # Sort the simulations
        simulations.sort()
        # Obtain the median value
        median_refute_values= simulations[int(num_simulations/2)]

        # Performing a two sided test
        if estimate.value > median_refute_values:
            # np.searchsorted tells us the index if it were a part of the array
            # We select side to be left as we want to find the first value that matches
            estimate_index = np.searchsorted(simulations, estimate.value, side="left")
            # We subtact 1 as we are finding the value from the right tail
            p_value = 1 - (estimate_index/ num_simulations)
        else:
            # We take the side to be right as we want to find the last index that matches
            estimate_index = np.searchsorted(simulations, estimate.value, side="right")
            # We get the probability with respect to the left tail.
            p_value = estimate_index / num_simulations

        return p_value

[docs]    def perform_normal_distribution_test(self, estimate, simulations):
        # Get the mean for the simulations
        mean_refute_values = np.mean(simulations)
        # Get the standard deviation for the simulations
        std_dev_refute_values = np.std(simulations)
        # Get the Z Score [(val - mean)/ std_dev ]
        z_score = (estimate.value - mean_refute_values)/ std_dev_refute_values


        if z_score > 0: # Right Tail
            p_value = 1 - st.norm.cdf(z_score)
        else: # Left Tail
            p_value = st.norm.cdf(z_score)

        return p_value

[docs]    def refute_estimate(self):
        raise NotImplementedError


[docs]class CausalRefutation:
    """Class for storing the result of a refutation method.

    """

    def __init__(self, estimated_effect, new_effect, refutation_type):
        self.estimated_effect = estimated_effect
        self.new_effect = new_effect
        self.refutation_type = refutation_type

        self.refutation_result = None

[docs]    def add_significance_test_results(self, refutation_result):
        self.refutation_result = refutation_result

[docs]    def add_refuter(self, refuter_instance):
        self.refuter = refuter_instance

[docs]    def interpret(self, method_name=None, **kwargs):
        """Interpret the refutation results.

        :param method_name: Method used (string) or a list of methods. If None, then the default for the specific refuter is used.

        :returns: None

        """
        if method_name is None:
            method_name = self.refuter.interpret_method
        method_name_arr = parse_state(method_name)
        import dowhy.interpreters as interpreters
        for method in method_name_arr:
            interpreter = interpreters.get_class_object(method)
            interpreter(self, **kwargs).interpret()

    def __str__(self):
        if self.refutation_result is None:
            return "{0}\nEstimated effect:{1}\nNew effect:{2}\n".format(
                self.refutation_type, self.estimated_effect, self.new_effect
            )
        else:
            return "{0}\nEstimated effect:{1}\nNew effect:{2}\np value:{3}\n".format(
                self.refutation_type, self.estimated_effect, self.new_effect, self.refutation_result['p_value']
            )