thov commited on
Commit
3ff951d
1 Parent(s): c792745

add threshold on correlation values

Browse files
Files changed (1) hide show
  1. utils.py +4 -4
utils.py CHANGED
@@ -11,13 +11,13 @@ def clean_csv_file(csv_file):
11
  return df
12
 
13
 
14
- def build_graph(csv_file):
15
  features = clean_csv_file(csv_file)
16
  links = features.corr(method='kendall').fillna(0).stack().reset_index()
17
  links.columns = ['var1', 'var2', 'value']
18
 
19
  # Keep only correlation over a threshold
20
- links_filtered = links.loc[(links['value'] > 0.1) & (links['var1'] != links['var2'])]
21
  iter_values = iter(set(links_filtered['value']))
22
 
23
  G = nx.from_pandas_edgelist(links_filtered, 'var1', 'var2')
@@ -46,9 +46,9 @@ class MplColorHelper:
46
  return f"rgb({r},{g},{b})"
47
 
48
 
49
- def display_graph(csv_file):
50
 
51
- G = build_graph(csv_file)
52
 
53
  CM_NAME = "Wistia"
54
 
 
11
  return df
12
 
13
 
14
+ def build_graph(csv_file, threshold):
15
  features = clean_csv_file(csv_file)
16
  links = features.corr(method='kendall').fillna(0).stack().reset_index()
17
  links.columns = ['var1', 'var2', 'value']
18
 
19
  # Keep only correlation over a threshold
20
+ links_filtered = links.loc[(links['value'] > threshold) & (links['var1'] != links['var2'])]
21
  iter_values = iter(set(links_filtered['value']))
22
 
23
  G = nx.from_pandas_edgelist(links_filtered, 'var1', 'var2')
 
46
  return f"rgb({r},{g},{b})"
47
 
48
 
49
+ def display_graph(csv_file, threshold):
50
 
51
+ G = build_graph(csv_file, threshold=threshold)
52
 
53
  CM_NAME = "Wistia"
54