1littlecoder commited on
Commit
bef7cb9
·
verified ·
1 Parent(s): 1dee282

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -53
app.py CHANGED
@@ -26,60 +26,7 @@ def topk_candidates(query, candidates, k):
26
 
27
  def create_gradio_interface():
28
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
29
- gr.Markdown("""
30
- # WordLlama Gradio Demo
31
 
32
- **WordLlama** is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware.
33
-
34
- For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama).
35
-
36
- ## Examples
37
-
38
- **Calculate Similarity**
39
-
40
- ```python
41
- from wordllama import WordLlama
42
-
43
- # Load the default WordLlama model
44
- wl = WordLlama.load()
45
-
46
- # Calculate similarity between two sentences
47
- similarity_score = wl.similarity("i went to the car", "i went to the pawn shop")
48
- print(similarity_score) # Output: 0.06641249096796882
49
- ```
50
-
51
- **Rank Documents**
52
-
53
- ```python
54
- query = "i went to the car"
55
- candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"]
56
- ranked_docs = wl.rank(query, candidates)
57
- print(ranked_docs)
58
- # Output:
59
- # [
60
- # ('i went to the vehicle', 0.7441646856486314),
61
- # ('i went to the truck', 0.2832691551894259),
62
- # ('i went to the shop', 0.19732814982305436),
63
- # ('i went to the park', 0.15101404519322253)
64
- # ]
65
- ```
66
-
67
- **Additional Inference Methods**
68
-
69
- ```python
70
- # Fuzzy Deduplication
71
- wl.deduplicate(candidates, threshold=0.8)
72
-
73
- # Clustering with K-means
74
- wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4)
75
-
76
- # Filtering Candidates
77
- wl.filter(query, candidates, threshold=0.3)
78
-
79
- # Top-k Candidates
80
- wl.topk(query, candidates, k=3)
81
- ```
82
- """)
83
 
84
  with gr.Tab("Similarity"):
85
  with gr.Row():
@@ -179,6 +126,61 @@ def create_gradio_interface():
179
  inputs=[topk_query, candidates_topk, k],
180
  )
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  return demo
183
 
184
  # Create and launch the Gradio interface
 
26
 
27
  def create_gradio_interface():
28
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  with gr.Tab("Similarity"):
32
  with gr.Row():
 
126
  inputs=[topk_query, candidates_topk, k],
127
  )
128
 
129
+ gr.Markdown("""
130
+ # WordLlama Gradio Demo
131
+
132
+ **WordLlama** is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware.
133
+
134
+ For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama).
135
+
136
+ ## Examples
137
+
138
+ **Calculate Similarity**
139
+
140
+ ```python
141
+ from wordllama import WordLlama
142
+
143
+ # Load the default WordLlama model
144
+ wl = WordLlama.load()
145
+
146
+ # Calculate similarity between two sentences
147
+ similarity_score = wl.similarity("i went to the car", "i went to the pawn shop")
148
+ print(similarity_score) # Output: 0.06641249096796882
149
+ ```
150
+
151
+ **Rank Documents**
152
+
153
+ ```python
154
+ query = "i went to the car"
155
+ candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"]
156
+ ranked_docs = wl.rank(query, candidates)
157
+ print(ranked_docs)
158
+ # Output:
159
+ # [
160
+ # ('i went to the vehicle', 0.7441646856486314),
161
+ # ('i went to the truck', 0.2832691551894259),
162
+ # ('i went to the shop', 0.19732814982305436),
163
+ # ('i went to the park', 0.15101404519322253)
164
+ # ]
165
+ ```
166
+
167
+ **Additional Inference Methods**
168
+
169
+ ```python
170
+ # Fuzzy Deduplication
171
+ wl.deduplicate(candidates, threshold=0.8)
172
+
173
+ # Clustering with K-means
174
+ wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4)
175
+
176
+ # Filtering Candidates
177
+ wl.filter(query, candidates, threshold=0.3)
178
+
179
+ # Top-k Candidates
180
+ wl.topk(query, candidates, k=3)
181
+ ```
182
+ """)
183
+
184
  return demo
185
 
186
  # Create and launch the Gradio interface