thov commited on
Commit
775aa8b
1 Parent(s): 843c970

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +9 -4
utils.py CHANGED
@@ -8,12 +8,16 @@ import networkx as nx
8
 
9
  def clean_csv_file(csv_file):
10
  df = pd.read_csv(csv_file)
 
 
 
 
11
  return df
12
 
13
 
14
- def build_graph(csv_file, threshold):
15
  features = clean_csv_file(csv_file)
16
- links = features.corr(method='kendall').fillna(0).stack().reset_index()
17
  links.columns = ['var1', 'var2', 'value']
18
 
19
  # Keep only correlation over a threshold
@@ -46,9 +50,9 @@ class MplColorHelper:
46
  return f"rgb({r},{g},{b})"
47
 
48
 
49
- def display_graph(csv_file, threshold):
50
 
51
- G = build_graph(csv_file, threshold=threshold)
52
 
53
  CM_NAME = "Wistia"
54
 
@@ -59,6 +63,7 @@ def display_graph(csv_file, threshold):
59
  # get rgb string for each node
60
  for u, v, data in G.edges(data=True):
61
  data['color'] = edge_colors.get_rgb_str(data['corr_value'])
 
62
  disp = gv.d3(
63
  G,
64
  # graph specs
 
8
 
9
  def clean_csv_file(csv_file):
10
  df = pd.read_csv(csv_file)
11
+
12
+ df.dropna(inplace=True)
13
+ df.drop_duplicates(inplace=True)
14
+
15
  return df
16
 
17
 
18
+ def build_graph(csv_file, *, threshold, corr_type):
19
  features = clean_csv_file(csv_file)
20
+ links = features.corr(method=corr_type).fillna(0).stack().reset_index()
21
  links.columns = ['var1', 'var2', 'value']
22
 
23
  # Keep only correlation over a threshold
 
50
  return f"rgb({r},{g},{b})"
51
 
52
 
53
+ def display_graph(csv_file, *, threshold, corr_type):
54
 
55
+ G = build_graph(csv_file, threshold=threshold, corr_type=corr_type)
56
 
57
  CM_NAME = "Wistia"
58
 
 
63
  # get rgb string for each node
64
  for u, v, data in G.edges(data=True):
65
  data['color'] = edge_colors.get_rgb_str(data['corr_value'])
66
+
67
  disp = gv.d3(
68
  G,
69
  # graph specs