|
import gradio as gr |
|
from wordllama import WordLlama |
|
|
|
|
|
wl = WordLlama.load() |
|
|
|
|
|
def calculate_similarity(text1, text2): |
|
score = wl.similarity(text1, text2) |
|
return f"Similarity Score: {score}" |
|
|
|
|
|
def rank_documents(query, candidates): |
|
candidates_list = candidates.split(";") |
|
ranked_docs = wl.rank(query, candidates_list) |
|
return ranked_docs |
|
|
|
|
|
def deduplicate_docs(candidates, threshold): |
|
candidates_list = candidates.split(";") |
|
deduplicated_docs = wl.deduplicate(candidates_list, threshold=threshold) |
|
return deduplicated_docs |
|
|
|
|
|
def cluster_docs(docs, k, max_iterations, tolerance): |
|
docs_list = docs.split(";") |
|
clusters = wl.cluster(docs_list, k=k, max_iterations=max_iterations, tolerance=tolerance) |
|
return clusters |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# WordLlama Gradio App") |
|
|
|
|
|
with gr.Tab("Similarity"): |
|
gr.Markdown("### Calculate Similarity between two texts") |
|
text1 = gr.Textbox(label="Text 1") |
|
text2 = gr.Textbox(label="Text 2") |
|
similarity_output = gr.Textbox(label="Similarity Score") |
|
similarity_button = gr.Button("Calculate Similarity") |
|
similarity_button.click(calculate_similarity, inputs=[text1, text2], outputs=similarity_output) |
|
|
|
|
|
with gr.Tab("Rank Documents"): |
|
gr.Markdown("### Rank documents based on a query") |
|
query = gr.Textbox(label="Query") |
|
candidates = gr.Textbox(label="Candidates (separate by semicolons)") |
|
rank_output = gr.JSON(label="Ranked Documents") |
|
rank_button = gr.Button("Rank Documents") |
|
rank_button.click(rank_documents, inputs=[query, candidates], outputs=rank_output) |
|
|
|
|
|
with gr.Tab("Fuzzy Deduplication"): |
|
gr.Markdown("### Deduplicate similar documents") |
|
candidates = gr.Textbox(label="Candidates (separate by semicolons)") |
|
threshold = gr.Slider(0.0, 1.0, value=0.8, label="Threshold") |
|
deduplicate_output = gr.JSON(label="Deduplicated Documents") |
|
deduplicate_button = gr.Button("Deduplicate") |
|
deduplicate_button.click(deduplicate_docs, inputs=[candidates, threshold], outputs=deduplicate_output) |
|
|
|
|
|
with gr.Tab("Clustering"): |
|
gr.Markdown("### Cluster documents") |
|
docs = gr.Textbox(label="Documents (separate by semicolons)") |
|
k = gr.Number(label="Number of Clusters", value=5) |
|
max_iterations = gr.Number(label="Max Iterations", value=100) |
|
tolerance = gr.Number(label="Tolerance", value=1e-4) |
|
cluster_output = gr.JSON(label="Clusters") |
|
cluster_button = gr.Button("Cluster Documents") |
|
cluster_button.click(cluster_docs, inputs=[docs, k, max_iterations, tolerance], outputs=cluster_output) |
|
|
|
demo.launch() |
|
|