KGNINJA commited on
Commit
456461d
·
verified ·
1 Parent(s): 652c9a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -14
app.py CHANGED
@@ -7,25 +7,34 @@ import umap
7
 
8
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def analyze_problems(text):
11
 
12
  problems = [p.strip() for p in text.split("\n") if p.strip()]
13
 
14
  embeddings = model.encode(problems)
15
 
16
- # clustering
17
  k = min(5, len(problems))
18
  kmeans = KMeans(n_clusters=k, random_state=0).fit(embeddings)
19
 
20
- clusters = kmeans.labels_
21
-
22
- # dimensionality reduction for visualization
23
  reducer = umap.UMAP()
24
  coords = reducer.fit_transform(embeddings)
25
 
26
  df = pd.DataFrame({
27
  "problem": problems,
28
- "cluster": clusters,
29
  "x": coords[:,0],
30
  "y": coords[:,1]
31
  })
@@ -41,28 +50,24 @@ def analyze_problems(text):
41
 
42
  cluster_summary = df.groupby("cluster")["problem"].apply(list).to_dict()
43
 
44
- summary_text = ""
45
 
46
  for c, items in cluster_summary.items():
47
- summary_text += f"\nCluster {c}\n"
48
  for i in items:
49
- summary_text += f"- {i}\n"
50
-
51
- return summary_text, fig
52
 
 
53
 
54
- with open("sample_problems.txt") as f:
55
- default_text = f.read()
56
 
57
  demo = gr.Interface(
58
  fn=analyze_problems,
59
- inputs=gr.Textbox(value=default_text, lines=15, label="Problem Signals"),
60
  outputs=[
61
  gr.Textbox(label="Problem Clusters"),
62
  gr.Plot(label="Problem Market Map")
63
  ],
64
  title="Problem Discovery Engine Demo",
65
- description="Detect clusters of real-world problems and visualize a Problem Market Map."
66
  )
67
 
68
  demo.launch()
 
7
 
8
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
9
 
10
+ default_problems = """
11
+ I manually rename files every week
12
+ I convert PDFs to Excel
13
+ I copy data between spreadsheets
14
+ I send weekly reports manually
15
+ I merge CSV files daily
16
+ I manually download invoices
17
+ I extract tables from PDFs
18
+ I clean messy Excel sheets
19
+ I manually schedule social posts
20
+ I track expenses in spreadsheets
21
+ """
22
+
23
  def analyze_problems(text):
24
 
25
  problems = [p.strip() for p in text.split("\n") if p.strip()]
26
 
27
  embeddings = model.encode(problems)
28
 
 
29
  k = min(5, len(problems))
30
  kmeans = KMeans(n_clusters=k, random_state=0).fit(embeddings)
31
 
 
 
 
32
  reducer = umap.UMAP()
33
  coords = reducer.fit_transform(embeddings)
34
 
35
  df = pd.DataFrame({
36
  "problem": problems,
37
+ "cluster": kmeans.labels_,
38
  "x": coords[:,0],
39
  "y": coords[:,1]
40
  })
 
50
 
51
  cluster_summary = df.groupby("cluster")["problem"].apply(list).to_dict()
52
 
53
+ summary = ""
54
 
55
  for c, items in cluster_summary.items():
56
+ summary += f"\nCluster {c}\n"
57
  for i in items:
58
+ summary += f"- {i}\n"
 
 
59
 
60
+ return summary, fig
61
 
 
 
62
 
63
  demo = gr.Interface(
64
  fn=analyze_problems,
65
+ inputs=gr.Textbox(value=default_problems, lines=15, label="Problem Signals"),
66
  outputs=[
67
  gr.Textbox(label="Problem Clusters"),
68
  gr.Plot(label="Problem Market Map")
69
  ],
70
  title="Problem Discovery Engine Demo",
 
71
  )
72
 
73
  demo.launch()