Spaces:
Runtime error
Runtime error
1littlecoder
commited on
Commit
•
1dee282
1
Parent(s):
152990b
Update app.py
Browse files
app.py
CHANGED
@@ -25,61 +25,159 @@ def topk_candidates(query, candidates, k):
|
|
25 |
return topk
|
26 |
|
27 |
def create_gradio_interface():
|
28 |
-
with gr.Blocks() as demo:
|
29 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
with gr.Tab("Similarity"):
|
32 |
with gr.Row():
|
33 |
sentence1 = gr.Textbox(label="Sentence 1", placeholder="Enter the first sentence here...")
|
34 |
sentence2 = gr.Textbox(label="Sentence 2", placeholder="Enter the second sentence here...")
|
35 |
similarity_output = gr.Number(label="Similarity Score")
|
36 |
-
gr.Button("Calculate Similarity")
|
|
|
37 |
fn=calculate_similarity,
|
38 |
inputs=[sentence1, sentence2],
|
39 |
outputs=[similarity_output]
|
40 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
with gr.Tab("Rank Documents"):
|
43 |
query = gr.Textbox(label="Query", placeholder="Enter the query here...")
|
44 |
candidates = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
|
45 |
ranked_docs_output = gr.Dataframe(headers=["Document", "Score"])
|
46 |
-
gr.Button("Rank Documents")
|
|
|
47 |
fn=lambda q, c: rank_documents(q, c.split(',')),
|
48 |
inputs=[query, candidates],
|
49 |
outputs=[ranked_docs_output]
|
50 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
with gr.Tab("Deduplicate Candidates"):
|
53 |
candidates_dedup = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
|
54 |
threshold_dedup = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.8)
|
55 |
deduplicated_output = gr.Textbox(label="Deduplicated Candidates")
|
56 |
-
gr.Button("Deduplicate")
|
|
|
57 |
fn=lambda c, t: deduplicate_candidates(c.split(','), t),
|
58 |
inputs=[candidates_dedup, threshold_dedup],
|
59 |
outputs=[deduplicated_output]
|
60 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
with gr.Tab("Filter Candidates"):
|
63 |
filter_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
|
64 |
candidates_filter = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
|
65 |
threshold_filter = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.3)
|
66 |
filtered_output = gr.Textbox(label="Filtered Candidates")
|
67 |
-
gr.Button("Filter Candidates")
|
|
|
68 |
fn=lambda q, c, t: filter_candidates(q, c.split(','), t),
|
69 |
inputs=[filter_query, candidates_filter, threshold_filter],
|
70 |
outputs=[filtered_output]
|
71 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
with gr.Tab("Top-k Candidates"):
|
74 |
topk_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
|
75 |
candidates_topk = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
|
76 |
k = gr.Slider(label="Top-k", minimum=1, maximum=10, step=1, value=3)
|
77 |
topk_output = gr.Textbox(label="Top-k Candidates")
|
78 |
-
gr.Button("Get Top-k Candidates")
|
|
|
79 |
fn=lambda q, c, k: topk_candidates(q, c.split(','), k),
|
80 |
inputs=[topk_query, candidates_topk, k],
|
81 |
outputs=[topk_output]
|
82 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
return demo
|
85 |
|
|
|
25 |
return topk
|
26 |
|
27 |
def create_gradio_interface():
|
28 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
29 |
+
gr.Markdown("""
|
30 |
+
# WordLlama Gradio Demo
|
31 |
+
|
32 |
+
**WordLlama** is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware.
|
33 |
+
|
34 |
+
For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama).
|
35 |
+
|
36 |
+
## Examples
|
37 |
+
|
38 |
+
**Calculate Similarity**
|
39 |
+
|
40 |
+
```python
|
41 |
+
from wordllama import WordLlama
|
42 |
+
|
43 |
+
# Load the default WordLlama model
|
44 |
+
wl = WordLlama.load()
|
45 |
+
|
46 |
+
# Calculate similarity between two sentences
|
47 |
+
similarity_score = wl.similarity("i went to the car", "i went to the pawn shop")
|
48 |
+
print(similarity_score) # Output: 0.06641249096796882
|
49 |
+
```
|
50 |
+
|
51 |
+
**Rank Documents**
|
52 |
+
|
53 |
+
```python
|
54 |
+
query = "i went to the car"
|
55 |
+
candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"]
|
56 |
+
ranked_docs = wl.rank(query, candidates)
|
57 |
+
print(ranked_docs)
|
58 |
+
# Output:
|
59 |
+
# [
|
60 |
+
# ('i went to the vehicle', 0.7441646856486314),
|
61 |
+
# ('i went to the truck', 0.2832691551894259),
|
62 |
+
# ('i went to the shop', 0.19732814982305436),
|
63 |
+
# ('i went to the park', 0.15101404519322253)
|
64 |
+
# ]
|
65 |
+
```
|
66 |
+
|
67 |
+
**Additional Inference Methods**
|
68 |
+
|
69 |
+
```python
|
70 |
+
# Fuzzy Deduplication
|
71 |
+
wl.deduplicate(candidates, threshold=0.8)
|
72 |
+
|
73 |
+
# Clustering with K-means
|
74 |
+
wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4)
|
75 |
+
|
76 |
+
# Filtering Candidates
|
77 |
+
wl.filter(query, candidates, threshold=0.3)
|
78 |
+
|
79 |
+
# Top-k Candidates
|
80 |
+
wl.topk(query, candidates, k=3)
|
81 |
+
```
|
82 |
+
""")
|
83 |
|
84 |
with gr.Tab("Similarity"):
|
85 |
with gr.Row():
|
86 |
sentence1 = gr.Textbox(label="Sentence 1", placeholder="Enter the first sentence here...")
|
87 |
sentence2 = gr.Textbox(label="Sentence 2", placeholder="Enter the second sentence here...")
|
88 |
similarity_output = gr.Number(label="Similarity Score")
|
89 |
+
submit_similarity_btn = gr.Button("Calculate Similarity")
|
90 |
+
submit_similarity_btn.click(
|
91 |
fn=calculate_similarity,
|
92 |
inputs=[sentence1, sentence2],
|
93 |
outputs=[similarity_output]
|
94 |
)
|
95 |
+
examples_similarity = gr.Examples(
|
96 |
+
examples=[
|
97 |
+
["I love programming.", "I enjoy coding."],
|
98 |
+
["The weather is sunny.", "It's a bright day."],
|
99 |
+
["I need coffee.", "I'm looking for a coffee shop."]
|
100 |
+
],
|
101 |
+
inputs=[sentence1, sentence2],
|
102 |
+
)
|
103 |
|
104 |
with gr.Tab("Rank Documents"):
|
105 |
query = gr.Textbox(label="Query", placeholder="Enter the query here...")
|
106 |
candidates = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
|
107 |
ranked_docs_output = gr.Dataframe(headers=["Document", "Score"])
|
108 |
+
submit_rank_btn = gr.Button("Rank Documents")
|
109 |
+
submit_rank_btn.click(
|
110 |
fn=lambda q, c: rank_documents(q, c.split(',')),
|
111 |
inputs=[query, candidates],
|
112 |
outputs=[ranked_docs_output]
|
113 |
)
|
114 |
+
examples_rank = gr.Examples(
|
115 |
+
examples=[
|
116 |
+
["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle"],
|
117 |
+
["Looking for a restaurant", "I need food, I'm hungry, I want to eat, Let's find a place to eat"],
|
118 |
+
["Best programming languages", "Python, JavaScript, Java, C++"]
|
119 |
+
],
|
120 |
+
inputs=[query, candidates],
|
121 |
+
)
|
122 |
|
123 |
with gr.Tab("Deduplicate Candidates"):
|
124 |
candidates_dedup = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
|
125 |
threshold_dedup = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.8)
|
126 |
deduplicated_output = gr.Textbox(label="Deduplicated Candidates")
|
127 |
+
submit_dedup_btn = gr.Button("Deduplicate")
|
128 |
+
submit_dedup_btn.click(
|
129 |
fn=lambda c, t: deduplicate_candidates(c.split(','), t),
|
130 |
inputs=[candidates_dedup, threshold_dedup],
|
131 |
outputs=[deduplicated_output]
|
132 |
)
|
133 |
+
examples_dedup = gr.Examples(
|
134 |
+
examples=[
|
135 |
+
["apple, apple, orange, banana", 0.8],
|
136 |
+
["cat, dog, cat, bird, dog", 0.9],
|
137 |
+
["text, text, more text, text", 0.7]
|
138 |
+
],
|
139 |
+
inputs=[candidates_dedup, threshold_dedup],
|
140 |
+
)
|
141 |
|
142 |
with gr.Tab("Filter Candidates"):
|
143 |
filter_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
|
144 |
candidates_filter = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
|
145 |
threshold_filter = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.3)
|
146 |
filtered_output = gr.Textbox(label="Filtered Candidates")
|
147 |
+
submit_filter_btn = gr.Button("Filter Candidates")
|
148 |
+
submit_filter_btn.click(
|
149 |
fn=lambda q, c, t: filter_candidates(q, c.split(','), t),
|
150 |
inputs=[filter_query, candidates_filter, threshold_filter],
|
151 |
outputs=[filtered_output]
|
152 |
)
|
153 |
+
examples_filter = gr.Examples(
|
154 |
+
examples=[
|
155 |
+
["I went to the car", "I went to the park, I went to the shop, I went to the truck", 0.3],
|
156 |
+
["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 0.4],
|
157 |
+
["Best programming languages", "Python, JavaScript, Java, C++", 0.5]
|
158 |
+
],
|
159 |
+
inputs=[filter_query, candidates_filter, threshold_filter],
|
160 |
+
)
|
161 |
|
162 |
with gr.Tab("Top-k Candidates"):
|
163 |
topk_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
|
164 |
candidates_topk = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
|
165 |
k = gr.Slider(label="Top-k", minimum=1, maximum=10, step=1, value=3)
|
166 |
topk_output = gr.Textbox(label="Top-k Candidates")
|
167 |
+
submit_topk_btn = gr.Button("Get Top-k Candidates")
|
168 |
+
submit_topk_btn.click(
|
169 |
fn=lambda q, c, k: topk_candidates(q, c.split(','), k),
|
170 |
inputs=[topk_query, candidates_topk, k],
|
171 |
outputs=[topk_output]
|
172 |
)
|
173 |
+
examples_topk = gr.Examples(
|
174 |
+
examples=[
|
175 |
+
["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle", 3],
|
176 |
+
["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 2],
|
177 |
+
["Best programming languages", "Python, JavaScript, Java, C++", 4]
|
178 |
+
],
|
179 |
+
inputs=[topk_query, candidates_topk, k],
|
180 |
+
)
|
181 |
|
182 |
return demo
|
183 |
|