1littlecoder commited on
Commit
1dee282
1 Parent(s): 152990b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -7
app.py CHANGED
@@ -25,61 +25,159 @@ def topk_candidates(query, candidates, k):
25
  return topk
26
 
27
  def create_gradio_interface():
28
- with gr.Blocks() as demo:
29
- gr.Markdown("## WordLlama Gradio Demo")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  with gr.Tab("Similarity"):
32
  with gr.Row():
33
  sentence1 = gr.Textbox(label="Sentence 1", placeholder="Enter the first sentence here...")
34
  sentence2 = gr.Textbox(label="Sentence 2", placeholder="Enter the second sentence here...")
35
  similarity_output = gr.Number(label="Similarity Score")
36
- gr.Button("Calculate Similarity").click(
 
37
  fn=calculate_similarity,
38
  inputs=[sentence1, sentence2],
39
  outputs=[similarity_output]
40
  )
 
 
 
 
 
 
 
 
41
 
42
  with gr.Tab("Rank Documents"):
43
  query = gr.Textbox(label="Query", placeholder="Enter the query here...")
44
  candidates = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
45
  ranked_docs_output = gr.Dataframe(headers=["Document", "Score"])
46
- gr.Button("Rank Documents").click(
 
47
  fn=lambda q, c: rank_documents(q, c.split(',')),
48
  inputs=[query, candidates],
49
  outputs=[ranked_docs_output]
50
  )
 
 
 
 
 
 
 
 
51
 
52
  with gr.Tab("Deduplicate Candidates"):
53
  candidates_dedup = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
54
  threshold_dedup = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.8)
55
  deduplicated_output = gr.Textbox(label="Deduplicated Candidates")
56
- gr.Button("Deduplicate").click(
 
57
  fn=lambda c, t: deduplicate_candidates(c.split(','), t),
58
  inputs=[candidates_dedup, threshold_dedup],
59
  outputs=[deduplicated_output]
60
  )
 
 
 
 
 
 
 
 
61
 
62
  with gr.Tab("Filter Candidates"):
63
  filter_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
64
  candidates_filter = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
65
  threshold_filter = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.3)
66
  filtered_output = gr.Textbox(label="Filtered Candidates")
67
- gr.Button("Filter Candidates").click(
 
68
  fn=lambda q, c, t: filter_candidates(q, c.split(','), t),
69
  inputs=[filter_query, candidates_filter, threshold_filter],
70
  outputs=[filtered_output]
71
  )
 
 
 
 
 
 
 
 
72
 
73
  with gr.Tab("Top-k Candidates"):
74
  topk_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
75
  candidates_topk = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
76
  k = gr.Slider(label="Top-k", minimum=1, maximum=10, step=1, value=3)
77
  topk_output = gr.Textbox(label="Top-k Candidates")
78
- gr.Button("Get Top-k Candidates").click(
 
79
  fn=lambda q, c, k: topk_candidates(q, c.split(','), k),
80
  inputs=[topk_query, candidates_topk, k],
81
  outputs=[topk_output]
82
  )
 
 
 
 
 
 
 
 
83
 
84
  return demo
85
 
 
25
  return topk
26
 
27
  def create_gradio_interface():
28
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
29
+ gr.Markdown("""
30
+ # WordLlama Gradio Demo
31
+
32
+ **WordLlama** is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware.
33
+
34
+ For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama).
35
+
36
+ ## Examples
37
+
38
+ **Calculate Similarity**
39
+
40
+ ```python
41
+ from wordllama import WordLlama
42
+
43
+ # Load the default WordLlama model
44
+ wl = WordLlama.load()
45
+
46
+ # Calculate similarity between two sentences
47
+ similarity_score = wl.similarity("i went to the car", "i went to the pawn shop")
48
+ print(similarity_score) # Output: 0.06641249096796882
49
+ ```
50
+
51
+ **Rank Documents**
52
+
53
+ ```python
54
+ query = "i went to the car"
55
+ candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"]
56
+ ranked_docs = wl.rank(query, candidates)
57
+ print(ranked_docs)
58
+ # Output:
59
+ # [
60
+ # ('i went to the vehicle', 0.7441646856486314),
61
+ # ('i went to the truck', 0.2832691551894259),
62
+ # ('i went to the shop', 0.19732814982305436),
63
+ # ('i went to the park', 0.15101404519322253)
64
+ # ]
65
+ ```
66
+
67
+ **Additional Inference Methods**
68
+
69
+ ```python
70
+ # Fuzzy Deduplication
71
+ wl.deduplicate(candidates, threshold=0.8)
72
+
73
+ # Clustering with K-means
74
+ wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4)
75
+
76
+ # Filtering Candidates
77
+ wl.filter(query, candidates, threshold=0.3)
78
+
79
+ # Top-k Candidates
80
+ wl.topk(query, candidates, k=3)
81
+ ```
82
+ """)
83
 
84
  with gr.Tab("Similarity"):
85
  with gr.Row():
86
  sentence1 = gr.Textbox(label="Sentence 1", placeholder="Enter the first sentence here...")
87
  sentence2 = gr.Textbox(label="Sentence 2", placeholder="Enter the second sentence here...")
88
  similarity_output = gr.Number(label="Similarity Score")
89
+ submit_similarity_btn = gr.Button("Calculate Similarity")
90
+ submit_similarity_btn.click(
91
  fn=calculate_similarity,
92
  inputs=[sentence1, sentence2],
93
  outputs=[similarity_output]
94
  )
95
+ examples_similarity = gr.Examples(
96
+ examples=[
97
+ ["I love programming.", "I enjoy coding."],
98
+ ["The weather is sunny.", "It's a bright day."],
99
+ ["I need coffee.", "I'm looking for a coffee shop."]
100
+ ],
101
+ inputs=[sentence1, sentence2],
102
+ )
103
 
104
  with gr.Tab("Rank Documents"):
105
  query = gr.Textbox(label="Query", placeholder="Enter the query here...")
106
  candidates = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
107
  ranked_docs_output = gr.Dataframe(headers=["Document", "Score"])
108
+ submit_rank_btn = gr.Button("Rank Documents")
109
+ submit_rank_btn.click(
110
  fn=lambda q, c: rank_documents(q, c.split(',')),
111
  inputs=[query, candidates],
112
  outputs=[ranked_docs_output]
113
  )
114
+ examples_rank = gr.Examples(
115
+ examples=[
116
+ ["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle"],
117
+ ["Looking for a restaurant", "I need food, I'm hungry, I want to eat, Let's find a place to eat"],
118
+ ["Best programming languages", "Python, JavaScript, Java, C++"]
119
+ ],
120
+ inputs=[query, candidates],
121
+ )
122
 
123
  with gr.Tab("Deduplicate Candidates"):
124
  candidates_dedup = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
125
  threshold_dedup = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.8)
126
  deduplicated_output = gr.Textbox(label="Deduplicated Candidates")
127
+ submit_dedup_btn = gr.Button("Deduplicate")
128
+ submit_dedup_btn.click(
129
  fn=lambda c, t: deduplicate_candidates(c.split(','), t),
130
  inputs=[candidates_dedup, threshold_dedup],
131
  outputs=[deduplicated_output]
132
  )
133
+ examples_dedup = gr.Examples(
134
+ examples=[
135
+ ["apple, apple, orange, banana", 0.8],
136
+ ["cat, dog, cat, bird, dog", 0.9],
137
+ ["text, text, more text, text", 0.7]
138
+ ],
139
+ inputs=[candidates_dedup, threshold_dedup],
140
+ )
141
 
142
  with gr.Tab("Filter Candidates"):
143
  filter_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
144
  candidates_filter = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
145
  threshold_filter = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.3)
146
  filtered_output = gr.Textbox(label="Filtered Candidates")
147
+ submit_filter_btn = gr.Button("Filter Candidates")
148
+ submit_filter_btn.click(
149
  fn=lambda q, c, t: filter_candidates(q, c.split(','), t),
150
  inputs=[filter_query, candidates_filter, threshold_filter],
151
  outputs=[filtered_output]
152
  )
153
+ examples_filter = gr.Examples(
154
+ examples=[
155
+ ["I went to the car", "I went to the park, I went to the shop, I went to the truck", 0.3],
156
+ ["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 0.4],
157
+ ["Best programming languages", "Python, JavaScript, Java, C++", 0.5]
158
+ ],
159
+ inputs=[filter_query, candidates_filter, threshold_filter],
160
+ )
161
 
162
  with gr.Tab("Top-k Candidates"):
163
  topk_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
164
  candidates_topk = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
165
  k = gr.Slider(label="Top-k", minimum=1, maximum=10, step=1, value=3)
166
  topk_output = gr.Textbox(label="Top-k Candidates")
167
+ submit_topk_btn = gr.Button("Get Top-k Candidates")
168
+ submit_topk_btn.click(
169
  fn=lambda q, c, k: topk_candidates(q, c.split(','), k),
170
  inputs=[topk_query, candidates_topk, k],
171
  outputs=[topk_output]
172
  )
173
+ examples_topk = gr.Examples(
174
+ examples=[
175
+ ["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle", 3],
176
+ ["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 2],
177
+ ["Best programming languages", "Python, JavaScript, Java, C++", 4]
178
+ ],
179
+ inputs=[topk_query, candidates_topk, k],
180
+ )
181
 
182
  return demo
183