jb2k commited on
Commit
48024e7
1 Parent(s): 38519b2

Uploaded initial app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
2
+ import torch
3
+ import gradio as gr
4
+
5
+ model_path = "jb2k/bert-base-multilingual-cased-language-detection"
6
+
7
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
9
+
10
+ language_dict = {0: 'Arabic',
11
+ 1: 'Basque',
12
+ 2: 'Breton',
13
+ 3: 'Catalan',
14
+ 4: 'Chinese_China',
15
+ 5: 'Chinese_Hongkong',
16
+ 6: 'Chinese_Taiwan',
17
+ 7: 'Chuvash',
18
+ 8: 'Czech',
19
+ 9: 'Dhivehi',
20
+ 10: 'Dutch',
21
+ 11: 'English',
22
+ 12: 'Esperanto',
23
+ 13: 'Estonian',
24
+ 14: 'French',
25
+ 15: 'Frisian',
26
+ 16: 'Georgian',
27
+ 17: 'German',
28
+ 18: 'Greek',
29
+ 19: 'Hakha_Chin',
30
+ 20: 'Indonesian',
31
+ 21: 'Interlingua',
32
+ 22: 'Italian',
33
+ 23: 'Japanese',
34
+ 24: 'Kabyle',
35
+ 25: 'Kinyarwanda',
36
+ 26: 'Kyrgyz',
37
+ 27: 'Latvian',
38
+ 28: 'Maltese',
39
+ 29: 'Mongolian',
40
+ 30: 'Persian',
41
+ 31: 'Polish',
42
+ 32: 'Portuguese',
43
+ 33: 'Romanian',
44
+ 34: 'Romansh_Sursilvan',
45
+ 35: 'Russian',
46
+ 36: 'Sakha',
47
+ 37: 'Slovenian',
48
+ 38: 'Spanish',
49
+ 39: 'Swedish',
50
+ 40: 'Tamil',
51
+ 41: 'Tatar',
52
+ 42: 'Turkish',
53
+ 43: 'Ukranian',
54
+ 44: 'Welsh'}
55
+
56
+ def inference(sentence):
57
+ tokenized_sentence = tokenizer(sentence, return_tensors='pt')
58
+ output = model(**tokenized_sentence)
59
+ predictions = torch.nn.functional.softmax(output.logits, dim=-1)
60
+ certainy, highest_value = torch.max(predictions, dim=-1, keepdim=False, out=None)
61
+ highest_value_int = highest_value.item()
62
+ language = language_dict[highest_value_int]
63
+ #return ("This sentence is in " + language + ". Certainy: " + str(certainy.item()))
64
+ return language
65
+
66
+ if __name__ == '__main__':
67
+ interFace = gr.Interface(fn=inference,
68
+ inputs=gr.inputs.Textbox(placeholder="Enter text here", label="Text content", lines=5),
69
+ outputs=gr.outputs.Label(num_top_classes=6, label="Language of this text is "),
70
+ verbose=True,
71
+ title="Language of a sequence of text",
72
+ description="",
73
+ theme="grass")
74
+ interFace.launch()
75
+