Spaces:

jvamvas
/

romansh-idiom-classification

Running

App Files Files Community

jvamvas commited on 26 days ago

Commit

1b40346

1 Parent(s): 4463fb4

Implement app

Browse files

Files changed (3) hide show

app.py +110 -0
classification_model/svm_char_word.joblib +3 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import re
+import gradio as gr
+from pathlib import Path
+from joblib import load
+# Load the classifier only once during development (reload mode)
+if gr.NO_RELOAD:
+    classifier = load(Path(__file__).parent / "classification_model" / "svm_char_word.joblib")
+def classify_text(text: str):
+    """Classify Romansh text and return prediction with probabilities."""
+    if not text.strip():
+        return "Please enter some text to classify."
+    # Map predictions to readable language names
+    language_names = {
+        'rm-sursilv': 'Sursilvan',
+        'rm-vallader': 'Vallader',
+        'rm-rumgr': 'Rumantsch Grischun',
+        'rm-surmiran': 'Surmiran',
+        'rm-puter': 'Puter',
+        'rm-sutsilv': 'Sutsilvan',
+        'unknown': 'Unknown'
+    }
+    # Get class labels from the classifier
+    class_labels = classifier.classes_
+    # Try to get probabilities if available, otherwise use decision function
+    try:
+        probabilities = classifier.predict_proba([text])[0]
+        # Create result dictionary with probabilities
+        result = {}
+        for i, label in enumerate(class_labels):
+            readable_name = language_names.get(label, label)
+            result[readable_name] = float(probabilities[i])
+    except AttributeError:
+        # LinearSVC doesn't have predict_proba, use decision function instead
+        decision_scores = classifier.decision_function([text])[0]
+        # Convert decision scores to probabilities using softmax
+        import numpy as np
+        exp_scores = np.exp(decision_scores - np.max(decision_scores))
+        probabilities = exp_scores / np.sum(exp_scores)
+        result = {}
+        for i, label in enumerate(class_labels):
+            readable_name = language_names.get(label, label)
+            result[readable_name] = float(probabilities[i])
+    return result
+# Read examples from the TSV file
+import pandas as pd
+import os
+tsv_path = os.path.join(os.path.dirname(__file__), "..", "lemmatizer", "demo", "example_sentences.tsv")
+df = pd.read_csv(tsv_path, sep='\t')
+# Create a list of examples with their idiom labels
+examples_data = []
+for col in df.columns:
+    for sentence in df[col].dropna():
+        if sentence.strip():  # Skip empty sentences
+            examples_data.append((sentence, col))
+# Create the examples list and labels
+examples = [sentence for sentence, _ in examples_data]
+example_labels = [f"[{idiom}:] {sentence}" for sentence, idiom in examples_data]
+# Create the Gradio interface
+with gr.Blocks(title="Romansh Idiom Classifier") as demo:
+    gr.Markdown("# Romansh Idiom Classifier")
+    gr.Markdown("Enter Romansh text to classify which idiom/variety it belongs to.")
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(
+                label="Romansh Text",
+                placeholder="Enter Romansh text here...",
+                lines=5,
+                max_lines=10
+            )
+        with gr.Column():
+            output = gr.Label(
+                label="Predicted Idiom",
+                num_top_classes=7
+            )
+    # Set up event handlers
+    text_input.change(fn=classify_text, inputs=text_input, outputs=output)
+    # Add examples from TSV file
+    gr.Examples(
+        examples=examples,
+        inputs=text_input,
+        label="Example Sentences",
+        example_labels=example_labels,
+        examples_per_page=100,
+        fn=classify_text,
+        outputs=output,
+        run_on_click=True,
+        cache_examples=True,
+        cache_mode='eager',
+        preload=0,
+    )
+if __name__ == "__main__":
+    demo.launch()

classification_model/svm_char_word.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:580b6d0e0cc96adbcf322d67f4caa4e46e7c2afc8e14e8b32f00e1c77f93cd47
+size 47463929

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+joblib
+scikit-learn
+pandas