| """ |
| Gradio app for Quora Duplicate Question Detector. |
| Deploy to Hugging Face Spaces with Gradio SDK. |
| """ |
| import sys |
| from pathlib import Path |
|
|
| ROOT = Path(__file__).resolve().parent |
| sys.path.insert(0, str(ROOT)) |
| sys.path.insert(0, str(ROOT / "streamlit-app")) |
|
|
| import nltk |
| nltk.download("stopwords", quiet=True) |
|
|
| import helper |
|
|
| import gradio as gr |
|
|
|
|
| def predict_fn(q1: str, q2: str, model_name: str): |
| """Run prediction and return formatted output.""" |
| q1_clean = (q1 or "").strip() |
| q2_clean = (q2 or "").strip() |
|
|
| if not q1_clean or not q2_clean: |
| return "β οΈ Please enter both questions.", 0.0 |
| if len(q1_clean) < 3 or len(q2_clean) < 3: |
| return "β οΈ Questions should be at least 3 characters.", 0.0 |
|
|
| try: |
| model_type = "classical" if "Classical" in model_name else "transformer" |
| pred, proba = helper.predict(q1_clean, q2_clean, model_type) |
|
|
| if pred: |
| msg = "**Duplicate** β These questions likely have the same meaning." |
| else: |
| msg = "**Not Duplicate** β These questions appear to be different." |
|
|
| return msg, proba |
| except Exception as e: |
| return f"β Error: {str(e)}", 0.0 |
|
|
|
|
| |
| available = helper.get_available_models() |
| if not available: |
| raise RuntimeError("No models found. Add models to models/ or configure HF Hub download.") |
|
|
| inference_times = helper.get_inference_times() |
| model_choices = [helper.get_model_display_name(m) for m in available] |
| model_choices_with_time = [] |
| for m in model_choices: |
| key = "classical" if "Classical" in m else "transformer" |
| ms = inference_times.get(key, {}).get("mean_ms", 0) |
| suffix = f" (~{ms:.0f} ms)" if ms else "" |
| model_choices_with_time.append(f"{m}{suffix}") |
|
|
| with gr.Blocks(title="Quora Duplicate Detector", theme=gr.themes.Soft()) as demo: |
| gr.Markdown("# π Quora Duplicate Question Pairs") |
| gr.Markdown("Enter two questions to check if they are semantically duplicate.") |
|
|
| with gr.Row(): |
| with gr.Column(scale=2): |
| q1 = gr.Textbox( |
| label="Question 1", |
| placeholder="e.g. What is the capital of India?", |
| lines=2, |
| ) |
| q2 = gr.Textbox( |
| label="Question 2", |
| placeholder="e.g. Which city is India's capital?", |
| lines=2, |
| ) |
| model_dropdown = gr.Dropdown( |
| label="Model", |
| choices=model_choices_with_time, |
| value=model_choices_with_time[0], |
| ) |
| check_btn = gr.Button("Check", variant="primary") |
| with gr.Column(scale=1): |
| result_text = gr.Markdown(value="") |
| proba_slider = gr.Slider( |
| minimum=0, |
| maximum=1, |
| value=0, |
| label="Probability of Duplicate", |
| interactive=False, |
| ) |
|
|
| with gr.Accordion("Try example pairs", open=False): |
| gr.Examples( |
| examples=[ |
| ["How do I learn Python?", "What is the best way to learn Python programming?"], |
| ["What is the capital of France?", "How do I cook pasta?"], |
| ], |
| inputs=[q1, q2], |
| label="", |
| ) |
|
|
| check_btn.click( |
| fn=predict_fn, |
| inputs=[q1, q2, model_dropdown], |
| outputs=[result_text, proba_slider], |
| ) |
|
|
| gr.Markdown("---") |
| with gr.Accordion("About", open=False): |
| gr.Markdown(""" |
| This app predicts whether two Quora questions are duplicates (same meaning). |
| |
| **Models:** |
| - **Classical**: Random Forest or XGBoost on 25 handcrafted features + TF-IDF |
| - **DistilBERT**: Fine-tuned transformer for sentence-pair classification |
| |
| *Built for fun & learning. Results may not always be accurate β use with caution.* |
| """) |
|
|
| demo.launch() |
|
|