kargaranamir commited on
Commit
6dabd3f
1 Parent(s): 7ec6a67

add new app.

Browse files
Files changed (3) hide show
  1. app.py +72 -29
  2. app_legacy.py → app_v0.py +0 -0
  3. app_v1.py +114 -0
app.py CHANGED
@@ -21,11 +21,35 @@ from selenium import webdriver
21
  from selenium.common.exceptions import WebDriverException
22
  import os
23
 
24
- # Load the FastText language identification model from Hugging Face Hub
25
- model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")
26
 
27
- # Create the FastText classifier
28
- classifier = _FastText(model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def remove_label_prefix(item):
31
  return item.replace('__label__', '')
@@ -36,20 +60,21 @@ def remove_label_prefix_list(input_list):
36
  else:
37
  return [remove_label_prefix(item) for item in input_list]
38
 
39
- class_names = remove_label_prefix_list(classifier.labels)
40
- class_names = np.sort(class_names)
41
- num_class = len(class_names)
42
 
43
- def tokenize_string(string):
44
- return string.split()
 
 
 
 
 
 
45
 
46
- explainer = lime.lime_text.LimeTextExplainer(
47
- split_expression=tokenize_string,
48
- bow=False,
49
- class_names=class_names
50
- )
51
 
52
- def fasttext_prediction_in_sklearn_format(classifier, texts):
 
 
 
53
  res = []
54
  labels, probabilities = classifier.predict(texts, num_class)
55
  labels = remove_label_prefix_list(labels)
@@ -58,11 +83,12 @@ def fasttext_prediction_in_sklearn_format(classifier, texts):
58
  res.append(probs[order])
59
  return np.array(res)
60
 
61
- def generate_explanation_html(input_sentence):
 
62
  preprocessed_sentence = input_sentence
63
  exp = explainer.explain_instance(
64
  preprocessed_sentence,
65
- classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
66
  top_labels=2,
67
  num_features=20,
68
  )
@@ -91,24 +117,41 @@ def take_screenshot(local_html_path):
91
 
92
  return Image.open(BytesIO(screenshot))
93
 
94
- def merge(input_sentence):
 
 
 
95
  input_sentence = input_sentence.replace('\n', ' ')
96
- output_html_filename = generate_explanation_html(input_sentence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  im = take_screenshot(output_html_filename)
98
-
99
  return im, output_html_filename
100
 
101
- input_sentence = gr.inputs.Textbox(label="Input Sentence")
 
 
102
 
103
  output_explanation = gr.outputs.File(label="Explanation HTML")
104
 
105
- iface = gr.Interface(
106
- fn=merge,
107
- inputs=input_sentence,
108
- outputs=[gr.Image(type="pil", height=364, width=683, label = "Explanation Image"), output_explanation],
109
- title="LIME LID",
110
- description="This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.",
111
- allow_flagging='never'
112
- )
113
 
 
 
 
 
 
 
 
114
  iface.launch()
 
21
  from selenium.common.exceptions import WebDriverException
22
  import os
23
 
 
 
24
 
25
+ # Define a dictionary to map model choices to their respective paths
26
+ model_paths = {
27
+ "LID201": ["kargaranamir/LID201", 'model.bin'],
28
+ "BIGLID": ["kargaranamir/BIGLID", 'model.bin'],
29
+ # "FT176": ["kargaranamir/FT176", 'model.bin'],
30
+ "NLLB": ["facebook/fasttext-language-identification", 'model.bin']
31
+ }
32
+
33
+ # Create a dictionary to cache classifiers
34
+ cached_classifiers = {}
35
+
36
+ def load_classifier(model_choice):
37
+ if model_choice in cached_classifiers:
38
+ return cached_classifiers[model_choice]
39
+
40
+ # Load the FastText language identification model from Hugging Face Hub
41
+ model_path = hf_hub_download(repo_id=model_paths[model_choice][0], filename=model_paths[model_choice][1])
42
+
43
+ # Create the FastText classifier
44
+ classifier = _FastText(model_path)
45
+
46
+ cached_classifiers[model_choice] = classifier
47
+ return classifier
48
+
49
+ # cache all models
50
+ for model_choice in model_paths.keys():
51
+ load_classifier(model_choice)
52
+
53
 
54
  def remove_label_prefix(item):
55
  return item.replace('__label__', '')
 
60
  else:
61
  return [remove_label_prefix(item) for item in input_list]
62
 
 
 
 
63
 
64
+ def tokenize_string(sentence, n=None):
65
+ if n is None:
66
+ tokens = sentence.split()
67
+ else:
68
+ tokens = []
69
+ for i in range(len(sentence) - n + 1):
70
+ tokens.append(sentence[i:i + n])
71
+ return tokens
72
 
 
 
 
 
 
73
 
74
+ def fasttext_prediction_in_sklearn_format(classifier, texts, num_class):
75
+ # if isinstance(texts, str):
76
+ # texts = [texts]
77
+
78
  res = []
79
  labels, probabilities = classifier.predict(texts, num_class)
80
  labels = remove_label_prefix_list(labels)
 
83
  res.append(probs[order])
84
  return np.array(res)
85
 
86
+
87
+ def generate_explanation_html(input_sentence, explainer, classifier, num_class):
88
  preprocessed_sentence = input_sentence
89
  exp = explainer.explain_instance(
90
  preprocessed_sentence,
91
+ classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x, num_class),
92
  top_labels=2,
93
  num_features=20,
94
  )
 
117
 
118
  return Image.open(BytesIO(screenshot))
119
 
120
+
121
+ # Define the merge function
122
+ def merge_function(input_sentence, selected_model):
123
+
124
  input_sentence = input_sentence.replace('\n', ' ')
125
+
126
+ # Load the FastText language identification (BIGLID) model from Hugging Face Hub
127
+ classifier = load_classifier(selected_model)
128
+ class_names = remove_label_prefix_list(classifier.labels)
129
+ class_names = np.sort(class_names)
130
+ num_class = len(class_names)
131
+
132
+ # Load Lime
133
+ explainer = lime.lime_text.LimeTextExplainer(
134
+ split_expression=tokenize_string,
135
+ bow=False,
136
+ class_names=class_names)
137
+
138
+ # Generate output
139
+ output_html_filename = generate_explanation_html(input_sentence, explainer, classifier, num_class)
140
  im = take_screenshot(output_html_filename)
 
141
  return im, output_html_filename
142
 
143
+ # Define the Gradio interface
144
+ input_text = gr.inputs.Textbox(label="Input Text")
145
+ model_choice = gr.Radio(choices=["BIGLID", "LID201", "NLLB"], label="Select Model", value='BIGLID')
146
 
147
  output_explanation = gr.outputs.File(label="Explanation HTML")
148
 
 
 
 
 
 
 
 
 
149
 
150
+ iface = gr.Interface(merge_function,
151
+ inputs=[input_text, model_choice],
152
+ outputs=[gr.Image(type="pil", height=364, width=683, label = "Explanation Image"), output_explanation],
153
+ title="LIME LID",
154
+ description="This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.",
155
+ allow_flagging='never')
156
+
157
  iface.launch()
app_legacy.py → app_v0.py RENAMED
File without changes
app_v1.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # """
2
+ # Author: Amir Hossein Kargaran
3
+ # Date: August, 2023
4
+
5
+ # Description: This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.
6
+
7
+ # MIT License
8
+
9
+ # Some part of the code is adopted from here: https://gist.github.com/ageitgey/60a8b556a9047a4ca91d6034376e5980
10
+ # """
11
+
12
+ import gradio as gr
13
+ from io import BytesIO
14
+ from fasttext.FastText import _FastText
15
+ import re
16
+ import lime.lime_text
17
+ import numpy as np
18
+ from PIL import Image
19
+ from huggingface_hub import hf_hub_download
20
+ from selenium import webdriver
21
+ from selenium.common.exceptions import WebDriverException
22
+ import os
23
+
24
+ # Load the FastText language identification model from Hugging Face Hub
25
+ model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")
26
+
27
+ # Create the FastText classifier
28
+ classifier = _FastText(model_path)
29
+
30
+ def remove_label_prefix(item):
31
+ return item.replace('__label__', '')
32
+
33
+ def remove_label_prefix_list(input_list):
34
+ if isinstance(input_list[0], list):
35
+ return [[remove_label_prefix(item) for item in inner_list] for inner_list in input_list]
36
+ else:
37
+ return [remove_label_prefix(item) for item in input_list]
38
+
39
+ class_names = remove_label_prefix_list(classifier.labels)
40
+ class_names = np.sort(class_names)
41
+ num_class = len(class_names)
42
+
43
+ def tokenize_string(string):
44
+ return string.split()
45
+
46
+ explainer = lime.lime_text.LimeTextExplainer(
47
+ split_expression=tokenize_string,
48
+ bow=False,
49
+ class_names=class_names
50
+ )
51
+
52
+ def fasttext_prediction_in_sklearn_format(classifier, texts):
53
+ res = []
54
+ labels, probabilities = classifier.predict(texts, num_class)
55
+ labels = remove_label_prefix_list(labels)
56
+ for label, probs, text in zip(labels, probabilities, texts):
57
+ order = np.argsort(np.array(label))
58
+ res.append(probs[order])
59
+ return np.array(res)
60
+
61
+ def generate_explanation_html(input_sentence):
62
+ preprocessed_sentence = input_sentence
63
+ exp = explainer.explain_instance(
64
+ preprocessed_sentence,
65
+ classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
66
+ top_labels=2,
67
+ num_features=20,
68
+ )
69
+ output_html_filename = "explanation.html"
70
+ exp.save_to_file(output_html_filename)
71
+ return output_html_filename
72
+
73
+ def take_screenshot(local_html_path):
74
+ options = webdriver.ChromeOptions()
75
+ options.add_argument('--headless')
76
+ options.add_argument('--no-sandbox')
77
+ options.add_argument('--disable-dev-shm-usage')
78
+
79
+ try:
80
+ local_html_path = os.path.abspath(local_html_path)
81
+ wd = webdriver.Chrome(options=options)
82
+ wd.set_window_size(1366, 728)
83
+ wd.get('file://' + local_html_path)
84
+ wd.implicitly_wait(10)
85
+ screenshot = wd.get_screenshot_as_png()
86
+ except WebDriverException as e:
87
+ return Image.new('RGB', (1, 1))
88
+ finally:
89
+ if wd:
90
+ wd.quit()
91
+
92
+ return Image.open(BytesIO(screenshot))
93
+
94
+ def merge(input_sentence):
95
+ input_sentence = input_sentence.replace('\n', ' ')
96
+ output_html_filename = generate_explanation_html(input_sentence)
97
+ im = take_screenshot(output_html_filename)
98
+
99
+ return im, output_html_filename
100
+
101
+ input_sentence = gr.inputs.Textbox(label="Input Sentence")
102
+
103
+ output_explanation = gr.outputs.File(label="Explanation HTML")
104
+
105
+ iface = gr.Interface(
106
+ fn=merge,
107
+ inputs=input_sentence,
108
+ outputs=[gr.Image(type="pil", height=364, width=683, label = "Explanation Image"), output_explanation],
109
+ title="LIME LID",
110
+ description="This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.",
111
+ allow_flagging='never'
112
+ )
113
+
114
+ iface.launch()