Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -8,12 +8,16 @@ import networkx as nx
|
|
8 |
|
9 |
def clean_csv_file(csv_file):
|
10 |
df = pd.read_csv(csv_file)
|
|
|
|
|
|
|
|
|
11 |
return df
|
12 |
|
13 |
|
14 |
-
def build_graph(csv_file, threshold):
|
15 |
features = clean_csv_file(csv_file)
|
16 |
-
links = features.corr(method=
|
17 |
links.columns = ['var1', 'var2', 'value']
|
18 |
|
19 |
# Keep only correlation over a threshold
|
@@ -46,9 +50,9 @@ class MplColorHelper:
|
|
46 |
return f"rgb({r},{g},{b})"
|
47 |
|
48 |
|
49 |
-
def display_graph(csv_file, threshold):
|
50 |
|
51 |
-
G = build_graph(csv_file, threshold=threshold)
|
52 |
|
53 |
CM_NAME = "Wistia"
|
54 |
|
@@ -59,6 +63,7 @@ def display_graph(csv_file, threshold):
|
|
59 |
# get rgb string for each node
|
60 |
for u, v, data in G.edges(data=True):
|
61 |
data['color'] = edge_colors.get_rgb_str(data['corr_value'])
|
|
|
62 |
disp = gv.d3(
|
63 |
G,
|
64 |
# graph specs
|
|
|
8 |
|
9 |
def clean_csv_file(csv_file):
|
10 |
df = pd.read_csv(csv_file)
|
11 |
+
|
12 |
+
df.dropna(inplace=True)
|
13 |
+
df.drop_duplicates(inplace=True)
|
14 |
+
|
15 |
return df
|
16 |
|
17 |
|
18 |
+
def build_graph(csv_file, *, threshold, corr_type):
|
19 |
features = clean_csv_file(csv_file)
|
20 |
+
links = features.corr(method=corr_type).fillna(0).stack().reset_index()
|
21 |
links.columns = ['var1', 'var2', 'value']
|
22 |
|
23 |
# Keep only correlation over a threshold
|
|
|
50 |
return f"rgb({r},{g},{b})"
|
51 |
|
52 |
|
53 |
+
def display_graph(csv_file, *, threshold, corr_type):
|
54 |
|
55 |
+
G = build_graph(csv_file, threshold=threshold, corr_type=corr_type)
|
56 |
|
57 |
CM_NAME = "Wistia"
|
58 |
|
|
|
63 |
# get rgb string for each node
|
64 |
for u, v, data in G.edges(data=True):
|
65 |
data['color'] = edge_colors.get_rgb_str(data['corr_value'])
|
66 |
+
|
67 |
disp = gv.d3(
|
68 |
G,
|
69 |
# graph specs
|