File size: 2,557 Bytes
ab66d4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85fd14e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# utils/do_calculus.py
import pandas as pd
import numpy as np
import networkx as nx

class DoCalculus:
    def __init__(self, graph):
        self.graph = graph

    def intervene(self, data, intervention_var, intervention_value):
        """

        Simulate do(X=x) intervention on a variable.

        Returns intervened DataFrame.

        This is a simplified implementation.

        """
        intervened_data = data.copy()
        
        # Direct intervention: set the value
        intervened_data[intervention_var] = intervention_value

        # Propagate effects (simplified linear model) - needs graph
        # For a true do-calculus, you'd prune the graph and re-estimate based on parents
        # For demonstration, this still uses a simplified propagation.
        try:
            # Ensure graph is connected and topological sort is possible
            if self.graph and not nx.is_directed_acyclic_graph(self.graph):
                print("Warning: Graph is not a DAG. Topological sort may fail or be incorrect for do-calculus.")
            
            # This simplified propagation is a conceptual placeholder
            for node in nx.topological_sort(self.graph):
                if node == intervention_var:
                    continue # Do not propagate back to the intervened variable
                
                parents = list(self.graph.predecessors(node))
                if parents:
                    # Very simplified linear model to show propagation
                    # In reality, you'd use learned coefficients or structural equations
                    combined_effect = np.zeros(len(intervened_data))
                    for p in parents:
                        if p in intervened_data.columns:
                            # Use a fixed random coefficient for demonstration
                            coeff = 0.5 
                            combined_effect += intervened_data[p].to_numpy() * coeff
                    
                    # Add a small random noise to simulate uncertainty
                    intervened_data[node] += combined_effect + np.random.normal(0, 0.1, len(intervened_data))
        except Exception as e:
            print(f"Could not perform full propagation due to graph issues or simplification: {e}")
            # Fallback to direct intervention only if graph logic fails
            pass # The direct intervention `intervened_data[intervention_var] = intervention_value` is already applied

        return intervened_data