Emily Ma commited on
Commit
0b152c3
·
1 Parent(s): caa113d

used tf-idf similarity

Browse files
Files changed (2) hide show
  1. app.py +42 -38
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,45 +1,49 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- # Simulated local storage for clustered emails
4
- email_clusters = {}
5
-
6
- def cluster_email(text: str) -> dict:
7
- """
8
- Assign a topic label to the email and store it in a local cluster dictionary.
9
-
10
- Args:
11
- text (str): The email content
12
-
13
- Returns:
14
- dict: Topic label and current cluster state
15
- """
16
- # Simple keyword-based topic detection
17
- if "build" in text.lower():
18
- topic = "Build"
19
- elif "agent" in text.lower():
20
- topic = "Agents"
21
- elif "cluster" in text.lower():
22
- topic = "Clustering"
23
- else:
24
- topic = "General"
25
-
26
- # Store in cluster
27
- email_clusters.setdefault(topic, []).append(text)
28
-
29
- return {
30
- "assigned_topic": topic,
31
- "cluster_size": len(email_clusters[topic]),
32
- "all_topics": list(email_clusters.keys())
33
- }
34
-
35
- # Gradio interface
36
  demo = gr.Interface(
37
- fn=cluster_email,
38
- inputs=gr.Textbox(placeholder="Paste email content here..."),
39
  outputs=gr.JSON(),
40
- title="Email Topic Classifier",
41
- description="Assigns a topic to an email and stores it in a local cluster"
42
  )
43
 
44
  if __name__ == "__main__":
45
- demo.launch(mcp_server=True)
 
1
  import gradio as gr
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+
5
+ # Pre-existing emails
6
+ pre_existing_emails = [
7
+ "Build system failed due to missing dependencies.",
8
+ "Agent deployment was successful on the new cluster.",
9
+ "Cluster performance metrics are above expectations.",
10
+ "General inquiry about the project timeline.",
11
+ "Build logs indicate a configuration error.",
12
+ "Agent is not responding after the last update.",
13
+ "Cluster nodes need to be restarted.",
14
+ "General feedback on the new UI.",
15
+ "Build completed successfully.",
16
+ "Agent registration failed due to timeout."
17
+ ]
18
+
19
+ # Fit TF-IDF vectorizer on pre-existing emails
20
+ vectorizer = TfidfVectorizer().fit(pre_existing_emails)
21
+ email_vectors = vectorizer.transform(pre_existing_emails)
22
+
23
+ def semantic_search(query: str) -> dict:
24
+ # Vectorize the query
25
+ query_vec = vectorizer.transform([query])
26
+ # Compute cosine similarity
27
+ similarities = cosine_similarity(query_vec, email_vectors).flatten()
28
+ # Get indices of top 5 similar emails
29
+ top_indices = similarities.argsort()[-5:][::-1]
30
+ # Prepare results
31
+ results = [
32
+ {
33
+ "email": pre_existing_emails[i],
34
+ "similarity": float(similarities[i])
35
+ }
36
+ for i in top_indices
37
+ ]
38
+ return {"results": results}
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  demo = gr.Interface(
41
+ fn=semantic_search,
42
+ inputs=gr.Textbox(placeholder="Enter your email or query..."),
43
  outputs=gr.JSON(),
44
+ title="Email Semantic Search",
45
+ description="Returns the top 5 most similar pre-existing emails based on your query."
46
  )
47
 
48
  if __name__ == "__main__":
49
+ demo.launch(mcp_server=True)
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio[mcp]
2
- textblob
 
 
1
  gradio[mcp]
2
+ textblob
3
+ scikit-learn