causalscience commited on
Commit
3700111
·
verified ·
1 Parent(s): 9bf8127

Delete Utils

Browse files
utils/__pycache__/helpers.cpython-312.pyc DELETED
Binary file (1.23 kB)
 
utils/__pycache__/plotting.cpython-312.pyc DELETED
Binary file (4.08 kB)
 
utils/helpers.py DELETED
@@ -1,28 +0,0 @@
1
- # causalscience/utils/helpers.py
2
-
3
- import pandas as pd
4
-
5
-
6
- def detect_column_type(df: pd.DataFrame, column: str) -> str:
7
- """
8
- Determine if a column is boolean (binary) or timeseries.
9
-
10
- Args:
11
- df (pd.DataFrame): Input DataFrame.
12
- column (str): Column name to inspect.
13
-
14
- Returns:
15
- 'boolean' if the column has exactly two unique values,
16
- 'timeseries' if all values can be parsed as dates,
17
- None otherwise.
18
- """
19
- unique_vals = df[column].dropna().unique()
20
- if len(unique_vals) == 2:
21
- return 'boolean'
22
- try:
23
- dt = pd.to_datetime(df[column], errors='coerce')
24
- if dt.notna().all():
25
- return 'timeseries'
26
- except Exception:
27
- pass
28
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/plotting.py DELETED
@@ -1,77 +0,0 @@
1
- # causalscience/utils/plotting.py
2
-
3
- import numpy as np
4
- import matplotlib.pyplot as plt
5
- from io import BytesIO
6
- from PIL import Image
7
-
8
-
9
- def calculate_standardized_differences(data, vars_list, treatment, weights=None):
10
- """
11
- Calculate standardized mean differences for covariate balance.
12
-
13
- Args:
14
- data (pd.DataFrame): Dataset including treatment indicator.
15
- vars_list (list[str]): Numeric covariate column names.
16
- treatment (str): Treatment indicator column (0/1).
17
- weights (list or np.array, optional): Weights for each observation.
18
-
19
- Returns:
20
- pd.DataFrame: Columns ['variable', 'std_diff'].
21
- """
22
- results = []
23
- if weights is None:
24
- weights = np.ones(len(data))
25
- else:
26
- weights = np.array(weights)
27
-
28
- treated = data[treatment] == 1
29
- control = data[treatment] == 0
30
-
31
- for var in vars_list:
32
- mean_t = np.average(data.loc[treated, var], weights=weights[treated])
33
- mean_c = np.average(data.loc[control, var], weights=weights[control])
34
- var_t = np.average((data.loc[treated, var] - mean_t)**2, weights=weights[treated])
35
- var_c = np.average((data.loc[control, var] - mean_c)**2, weights=weights[control])
36
- pooled_sd = np.sqrt((var_t + var_c) / 2)
37
- std_diff = (mean_t - mean_c) / pooled_sd if pooled_sd != 0 else np.nan
38
- results.append({'variable': var, 'std_diff': std_diff})
39
- import pandas as pd
40
- return pd.DataFrame(results)
41
-
42
-
43
- def love_plot(std_diffs_list, labels, threshold=0.1, abs_val=False):
44
- """
45
- Generate a Love plot for covariate balance.
46
-
47
- Args:
48
- std_diffs_list (list[pd.DataFrame]): List of std diff DataFrames.
49
- labels (list[str]): Labels for each dataset.
50
- threshold (float, optional): Threshold lines for balance.
51
- abs_val (bool, optional): Plot absolute std diffs.
52
-
53
- Returns:
54
- PIL.Image.Image: Love plot saved to image buffer.
55
- """
56
- fig, ax = plt.subplots(figsize=(10, 6))
57
- markers = ['o', 's', '^', 'd']
58
- colors = ['red', 'blue', 'green', 'purple']
59
-
60
- for i, (std_df, label) in enumerate(zip(std_diffs_list, labels)):
61
- values = std_df['std_diff'].abs() if abs_val else std_df['std_diff']
62
- ax.scatter(values, std_df['variable'], label=label,
63
- marker=markers[i % len(markers)], s=100)
64
-
65
- ax.axvline(x=threshold, color='gray', linestyle='--', alpha=0.5)
66
- ax.axvline(x=-threshold, color='gray', linestyle='--', alpha=0.5)
67
-
68
- ax.set_xlabel('Standardized Mean Difference')
69
- ax.set_title('Love Plot: Covariate Balance')
70
- ax.legend()
71
-
72
- buf = BytesIO()
73
- fig.tight_layout()
74
- fig.savefig(buf, format='png', bbox_inches='tight')
75
- plt.close(fig)
76
- buf.seek(0)
77
- return Image.open(buf)