kargaranamir commited on
Commit
41c3c5a
β€’
1 Parent(s): 81771f9
Files changed (3) hide show
  1. README.md +5 -5
  2. app.py +113 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: LangID LIME
3
- emoji: πŸ‘€
4
- colorFrom: gray
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 3.40.1
8
  app_file: app.py
@@ -10,4 +10,4 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: LangID-LIME
3
+ emoji: πŸ‹
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.40.1
8
  app_file: app.py
 
10
  license: mit
11
  ---
12
 
13
+ This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # """
2
+ # Author: Amir Hossein Kargaran
3
+ # Date: August, 2023
4
+
5
+ # Description: This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.
6
+
7
+ # MIT License
8
+
9
+ # Some part of the code is adopted from here: https://gist.github.com/ageitgey/60a8b556a9047a4ca91d6034376e5980
10
+ # """
11
+
12
+ import gradio as gr
13
+ from io import BytesIO
14
+ import base64
15
+ from fasttext.FastText import _FastText
16
+ import re
17
+ import lime.lime_text
18
+ import numpy as np
19
+ from pathlib import Path
20
+ from huggingface_hub import hf_hub_download
21
+
22
+ # Load the FastText language identification model from Hugging Face Hub
23
+ model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")
24
+
25
+ # Create the FastText classifier
26
+ classifier = _FastText(model_path)
27
+
28
+ def remove_label_prefix(item):
29
+ """
30
+ Remove label prefix from an item
31
+ """
32
+ return item.replace('__label__', '')
33
+
34
+ def remove_label_prefix_list(input_list):
35
+ """
36
+ Remove label prefix from list or list of list
37
+ """
38
+ if isinstance(input_list[0], list):
39
+ # If the first element is a list, it's a list of lists
40
+ return [[remove_label_prefix(item) for item in inner_list] for inner_list in input_list]
41
+ else:
42
+ # Otherwise, it's a simple list
43
+ return [remove_label_prefix(item) for item in input_list]
44
+
45
+
46
+ # Get the sorted class names from the classifier
47
+ class_names = remove_label_prefix_list(classifier.labels)
48
+ class_names = np.sort(class_names)
49
+ num_class = len(class_names)
50
+
51
+
52
+ def tokenize_string(string):
53
+ """
54
+ Splits the string into words similar to FastText's method.
55
+ """
56
+ return string.split()
57
+
58
+ explainer = lime.lime_text.LimeTextExplainer(
59
+ split_expression=tokenize_string,
60
+ bow=False,
61
+ class_names=class_names
62
+ )
63
+
64
+ def fasttext_prediction_in_sklearn_format(classifier, texts):
65
+ """
66
+ Converts FastText predictions into Scikit-Learn format predictions.
67
+ """
68
+ res = []
69
+ labels, probabilities = classifier.predict(texts, num_class)
70
+
71
+ # Remove label prefix
72
+ labels = remove_label_prefix_list(labels)
73
+
74
+ for label, probs, text in zip(labels, probabilities, texts):
75
+ order = np.argsort(np.array(label))
76
+ res.append(probs[order])
77
+
78
+ return np.array(res)
79
+
80
+ def generate_explanation_html(input_sentence):
81
+ """
82
+ Generates an explanation HTML file using LIME for the input sentence.
83
+ """
84
+ preprocessed_sentence = input_sentence # No need to preprocess anymore
85
+ exp = explainer.explain_instance(
86
+ preprocessed_sentence,
87
+ classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
88
+ top_labels=2,
89
+ num_features=20,
90
+ )
91
+
92
+ output_html_filename = "explanation.html"
93
+ exp.save_to_file(output_html_filename)
94
+
95
+ return output_html_filename
96
+
97
+ def download_html_file(html_filename):
98
+ """
99
+ Downloads the content of the given HTML file.
100
+ """
101
+ with open(html_filename, "rb") as file:
102
+ html_content = file.read()
103
+ return html_content
104
+
105
+ input_sentence = gr.inputs.Textbox(label="Input Sentence") # Change the label if needed
106
+ output_explanation = gr.outputs.File(label="Download Explanation HTML")
107
+
108
+ gr.Interface(
109
+ fn=generate_explanation_html,
110
+ inputs=input_sentence,
111
+ outputs=output_explanation,
112
+ allow_flagging='never'
113
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fasttext>=0.9.2
2
+ lime>=0.2.0,<0.3.0
3
+ huggingface-hub>=0.14.1
4
+ numpy>=1.24.3
5
+ gradio>=3.40.1