Spaces:
Runtime error
Runtime error
1littlecoder
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -26,60 +26,7 @@ def topk_candidates(query, candidates, k):
|
|
26 |
|
27 |
def create_gradio_interface():
|
28 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
29 |
-
gr.Markdown("""
|
30 |
-
# WordLlama Gradio Demo
|
31 |
|
32 |
-
**WordLlama** is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware.
|
33 |
-
|
34 |
-
For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama).
|
35 |
-
|
36 |
-
## Examples
|
37 |
-
|
38 |
-
**Calculate Similarity**
|
39 |
-
|
40 |
-
```python
|
41 |
-
from wordllama import WordLlama
|
42 |
-
|
43 |
-
# Load the default WordLlama model
|
44 |
-
wl = WordLlama.load()
|
45 |
-
|
46 |
-
# Calculate similarity between two sentences
|
47 |
-
similarity_score = wl.similarity("i went to the car", "i went to the pawn shop")
|
48 |
-
print(similarity_score) # Output: 0.06641249096796882
|
49 |
-
```
|
50 |
-
|
51 |
-
**Rank Documents**
|
52 |
-
|
53 |
-
```python
|
54 |
-
query = "i went to the car"
|
55 |
-
candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"]
|
56 |
-
ranked_docs = wl.rank(query, candidates)
|
57 |
-
print(ranked_docs)
|
58 |
-
# Output:
|
59 |
-
# [
|
60 |
-
# ('i went to the vehicle', 0.7441646856486314),
|
61 |
-
# ('i went to the truck', 0.2832691551894259),
|
62 |
-
# ('i went to the shop', 0.19732814982305436),
|
63 |
-
# ('i went to the park', 0.15101404519322253)
|
64 |
-
# ]
|
65 |
-
```
|
66 |
-
|
67 |
-
**Additional Inference Methods**
|
68 |
-
|
69 |
-
```python
|
70 |
-
# Fuzzy Deduplication
|
71 |
-
wl.deduplicate(candidates, threshold=0.8)
|
72 |
-
|
73 |
-
# Clustering with K-means
|
74 |
-
wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4)
|
75 |
-
|
76 |
-
# Filtering Candidates
|
77 |
-
wl.filter(query, candidates, threshold=0.3)
|
78 |
-
|
79 |
-
# Top-k Candidates
|
80 |
-
wl.topk(query, candidates, k=3)
|
81 |
-
```
|
82 |
-
""")
|
83 |
|
84 |
with gr.Tab("Similarity"):
|
85 |
with gr.Row():
|
@@ -179,6 +126,61 @@ def create_gradio_interface():
|
|
179 |
inputs=[topk_query, candidates_topk, k],
|
180 |
)
|
181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
return demo
|
183 |
|
184 |
# Create and launch the Gradio interface
|
|
|
26 |
|
27 |
def create_gradio_interface():
|
28 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
|
|
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
with gr.Tab("Similarity"):
|
32 |
with gr.Row():
|
|
|
126 |
inputs=[topk_query, candidates_topk, k],
|
127 |
)
|
128 |
|
129 |
+
gr.Markdown("""
|
130 |
+
# WordLlama Gradio Demo
|
131 |
+
|
132 |
+
**WordLlama** is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware.
|
133 |
+
|
134 |
+
For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama).
|
135 |
+
|
136 |
+
## Examples
|
137 |
+
|
138 |
+
**Calculate Similarity**
|
139 |
+
|
140 |
+
```python
|
141 |
+
from wordllama import WordLlama
|
142 |
+
|
143 |
+
# Load the default WordLlama model
|
144 |
+
wl = WordLlama.load()
|
145 |
+
|
146 |
+
# Calculate similarity between two sentences
|
147 |
+
similarity_score = wl.similarity("i went to the car", "i went to the pawn shop")
|
148 |
+
print(similarity_score) # Output: 0.06641249096796882
|
149 |
+
```
|
150 |
+
|
151 |
+
**Rank Documents**
|
152 |
+
|
153 |
+
```python
|
154 |
+
query = "i went to the car"
|
155 |
+
candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"]
|
156 |
+
ranked_docs = wl.rank(query, candidates)
|
157 |
+
print(ranked_docs)
|
158 |
+
# Output:
|
159 |
+
# [
|
160 |
+
# ('i went to the vehicle', 0.7441646856486314),
|
161 |
+
# ('i went to the truck', 0.2832691551894259),
|
162 |
+
# ('i went to the shop', 0.19732814982305436),
|
163 |
+
# ('i went to the park', 0.15101404519322253)
|
164 |
+
# ]
|
165 |
+
```
|
166 |
+
|
167 |
+
**Additional Inference Methods**
|
168 |
+
|
169 |
+
```python
|
170 |
+
# Fuzzy Deduplication
|
171 |
+
wl.deduplicate(candidates, threshold=0.8)
|
172 |
+
|
173 |
+
# Clustering with K-means
|
174 |
+
wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4)
|
175 |
+
|
176 |
+
# Filtering Candidates
|
177 |
+
wl.filter(query, candidates, threshold=0.3)
|
178 |
+
|
179 |
+
# Top-k Candidates
|
180 |
+
wl.topk(query, candidates, k=3)
|
181 |
+
```
|
182 |
+
""")
|
183 |
+
|
184 |
return demo
|
185 |
|
186 |
# Create and launch the Gradio interface
|