Spaces:

kargaranamir
/

LangID-LIME

Running

App Files Files Community

kargaranamir commited on Aug 28, 2023

Commit

6dabd3f

1 Parent(s): 7ec6a67

add new app.

Browse files

Files changed (3) hide show

app.py +72 -29
app_legacy.py → app_v0.py +0 -0
app_v1.py +114 -0

app.py CHANGED Viewed

@@ -21,11 +21,35 @@ from selenium import webdriver
 from selenium.common.exceptions import WebDriverException
 import os
-# Load the FastText language identification model from Hugging Face Hub
-model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")
-# Create the FastText classifier
-classifier = _FastText(model_path)
 def remove_label_prefix(item):
     return item.replace('__label__', '')
@@ -36,20 +60,21 @@ def remove_label_prefix_list(input_list):
     else:
         return [remove_label_prefix(item) for item in input_list]
-class_names = remove_label_prefix_list(classifier.labels)
-class_names = np.sort(class_names)
-num_class = len(class_names)
-def tokenize_string(string):
-    return string.split()
-explainer = lime.lime_text.LimeTextExplainer(
-    split_expression=tokenize_string,
-    bow=False,
-    class_names=class_names
-)
-def fasttext_prediction_in_sklearn_format(classifier, texts):
     res = []
     labels, probabilities = classifier.predict(texts, num_class)
     labels = remove_label_prefix_list(labels)
@@ -58,11 +83,12 @@ def fasttext_prediction_in_sklearn_format(classifier, texts):
         res.append(probs[order])
     return np.array(res)
-def generate_explanation_html(input_sentence):
     preprocessed_sentence = input_sentence
     exp = explainer.explain_instance(
         preprocessed_sentence,
-        classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
         top_labels=2,
         num_features=20,
     )
@@ -91,24 +117,41 @@ def take_screenshot(local_html_path):
     return Image.open(BytesIO(screenshot))
-def merge(input_sentence):
     input_sentence = input_sentence.replace('\n', ' ')
-    output_html_filename = generate_explanation_html(input_sentence)
     im = take_screenshot(output_html_filename)
     return im, output_html_filename
-input_sentence = gr.inputs.Textbox(label="Input Sentence")
 output_explanation = gr.outputs.File(label="Explanation HTML")
-iface = gr.Interface(
-    fn=merge,
-    inputs=input_sentence,
-    outputs=[gr.Image(type="pil", height=364, width=683, label = "Explanation Image"), output_explanation],
-    title="LIME LID",
-    description="This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.",
-    allow_flagging='never'
-)
 iface.launch()

 from selenium.common.exceptions import WebDriverException
 import os
+# Define a dictionary to map model choices to their respective paths
+model_paths = {
+    "LID201": ["kargaranamir/LID201", 'model.bin'],
+    "BIGLID": ["kargaranamir/BIGLID", 'model.bin'],
+    # "FT176": ["kargaranamir/FT176", 'model.bin'],
+    "NLLB": ["facebook/fasttext-language-identification", 'model.bin']
+}
+# Create a dictionary to cache classifiers
+cached_classifiers = {}
+def load_classifier(model_choice):
+    if model_choice in cached_classifiers:
+        return cached_classifiers[model_choice]
+    # Load the FastText language identification model from Hugging Face Hub
+    model_path = hf_hub_download(repo_id=model_paths[model_choice][0], filename=model_paths[model_choice][1])
+    # Create the FastText classifier
+    classifier = _FastText(model_path)
+    cached_classifiers[model_choice] = classifier
+    return classifier
+# cache all models
+for model_choice in model_paths.keys():
+    load_classifier(model_choice)
 def remove_label_prefix(item):
     return item.replace('__label__', '')
     else:
         return [remove_label_prefix(item) for item in input_list]
+def tokenize_string(sentence, n=None):
+    if n is None:
+        tokens = sentence.split()
+    else:
+        tokens = []
+        for i in range(len(sentence) - n + 1):
+            tokens.append(sentence[i:i + n])
+    return tokens
+def fasttext_prediction_in_sklearn_format(classifier, texts, num_class):
+    # if isinstance(texts, str):
+    #     texts = [texts]
     res = []
     labels, probabilities = classifier.predict(texts, num_class)
     labels = remove_label_prefix_list(labels)
         res.append(probs[order])
     return np.array(res)
+def generate_explanation_html(input_sentence, explainer, classifier, num_class):
     preprocessed_sentence = input_sentence
     exp = explainer.explain_instance(
         preprocessed_sentence,
+        classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x, num_class),
         top_labels=2,
         num_features=20,
     )
     return Image.open(BytesIO(screenshot))
+# Define the merge function
+def merge_function(input_sentence, selected_model):
     input_sentence = input_sentence.replace('\n', ' ')
+    # Load the FastText language identification (BIGLID) model from Hugging Face Hub
+    classifier = load_classifier(selected_model)
+    class_names = remove_label_prefix_list(classifier.labels)
+    class_names = np.sort(class_names)
+    num_class = len(class_names)
+    # Load Lime
+    explainer = lime.lime_text.LimeTextExplainer(
+    split_expression=tokenize_string,
+    bow=False,
+    class_names=class_names)
+    # Generate output
+    output_html_filename = generate_explanation_html(input_sentence, explainer, classifier, num_class)
     im = take_screenshot(output_html_filename)
     return im, output_html_filename
+# Define the Gradio interface
+input_text = gr.inputs.Textbox(label="Input Text")
+model_choice = gr.Radio(choices=["BIGLID", "LID201", "NLLB"], label="Select Model",  value='BIGLID')
 output_explanation = gr.outputs.File(label="Explanation HTML")
+iface = gr.Interface(merge_function,
+                     inputs=[input_text, model_choice],
+                     outputs=[gr.Image(type="pil", height=364, width=683, label = "Explanation Image"), output_explanation],
+                     title="LIME LID",
+                     description="This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.",
+                     allow_flagging='never')
 iface.launch()

app_legacy.py → app_v0.py RENAMED Viewed

File without changes

app_v1.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# """
+# Author: Amir Hossein Kargaran
+# Date: August, 2023
+# Description: This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.
+# MIT License
+# Some part of the code is adopted from here: https://gist.github.com/ageitgey/60a8b556a9047a4ca91d6034376e5980
+# """
+import gradio as gr
+from io import BytesIO
+from fasttext.FastText import _FastText
+import re
+import lime.lime_text
+import numpy as np
+from PIL import Image
+from huggingface_hub import hf_hub_download
+from selenium import webdriver
+from selenium.common.exceptions import WebDriverException
+import os
+# Load the FastText language identification model from Hugging Face Hub
+model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")
+# Create the FastText classifier
+classifier = _FastText(model_path)
+def remove_label_prefix(item):
+    return item.replace('__label__', '')
+def remove_label_prefix_list(input_list):
+    if isinstance(input_list[0], list):
+        return [[remove_label_prefix(item) for item in inner_list] for inner_list in input_list]
+    else:
+        return [remove_label_prefix(item) for item in input_list]
+class_names = remove_label_prefix_list(classifier.labels)
+class_names = np.sort(class_names)
+num_class = len(class_names)
+def tokenize_string(string):
+    return string.split()
+explainer = lime.lime_text.LimeTextExplainer(
+    split_expression=tokenize_string,
+    bow=False,
+    class_names=class_names
+)
+def fasttext_prediction_in_sklearn_format(classifier, texts):
+    res = []
+    labels, probabilities = classifier.predict(texts, num_class)
+    labels = remove_label_prefix_list(labels)
+    for label, probs, text in zip(labels, probabilities, texts):
+        order = np.argsort(np.array(label))
+        res.append(probs[order])
+    return np.array(res)
+def generate_explanation_html(input_sentence):
+    preprocessed_sentence = input_sentence
+    exp = explainer.explain_instance(
+        preprocessed_sentence,
+        classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
+        top_labels=2,
+        num_features=20,
+    )
+    output_html_filename = "explanation.html"
+    exp.save_to_file(output_html_filename)
+    return output_html_filename
+def take_screenshot(local_html_path):
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--no-sandbox')
+    options.add_argument('--disable-dev-shm-usage')
+    try:
+        local_html_path = os.path.abspath(local_html_path)
+        wd = webdriver.Chrome(options=options)
+        wd.set_window_size(1366, 728)
+        wd.get('file://' + local_html_path)
+        wd.implicitly_wait(10)
+        screenshot = wd.get_screenshot_as_png()
+    except WebDriverException as e:
+        return Image.new('RGB', (1, 1))
+    finally:
+        if wd:
+            wd.quit()
+    return Image.open(BytesIO(screenshot))
+def merge(input_sentence):
+    input_sentence = input_sentence.replace('\n', ' ')
+    output_html_filename = generate_explanation_html(input_sentence)
+    im = take_screenshot(output_html_filename)
+    return im, output_html_filename
+input_sentence = gr.inputs.Textbox(label="Input Sentence")
+output_explanation = gr.outputs.File(label="Explanation HTML")
+iface = gr.Interface(
+    fn=merge,
+    inputs=input_sentence,
+    outputs=[gr.Image(type="pil", height=364, width=683, label = "Explanation Image"), output_explanation],
+    title="LIME LID",
+    description="This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.",
+    allow_flagging='never'
+)
+iface.launch()