Shanks0465 commited on
Commit
4a7a1cb
1 Parent(s): dca1876

Updated to Interface Application

Browse files
Files changed (1) hide show
  1. app.py +78 -63
app.py CHANGED
@@ -1,83 +1,98 @@
1
- # -*- coding: utf-8 -*-
2
-
3
  import gradio as gr
4
- import transformers
5
- from transformers import MBartForConditionalGeneration, AutoModelForSeq2SeqLM
6
- from transformers import AlbertTokenizer, AutoTokenizer
7
-
8
- tokenizer = AlbertTokenizer.from_pretrained("ai4bharat/MultiIndicWikiBioSS", do_lower_case=False, use_fast=False, keep_accents=True)
9
-
10
- # Or use tokenizer = AlbertTokenizer.from_pretrained("ai4bharat/IndicBART-XLSum", do_lower_case=False, use_fast=False, keep_accents=True)
11
-
12
- # xlsummodel = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/IndicBART-XLSum")
13
- qgmodel = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/MultiIndicQuestionGenerationSS").eval()
14
- hgmodel = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/MultiIndicHeadlineGenerationSS").eval()
15
- ssmodel = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/MultiIndicSentenceSummarizationSS").eval()
16
- ppmodel = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/MultiIndicParaphraseGenerationSS").eval()
17
- wbmodel = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/MultiIndicWikiBioSS").eval()
 
 
18
 
19
- # Some initial mapping
20
  bos_id = tokenizer._convert_token_to_id_with_added_voc("<s>")
21
  eos_id = tokenizer._convert_token_to_id_with_added_voc("</s>")
22
  pad_id = tokenizer._convert_token_to_id_with_added_voc("<pad>")
23
- # To get lang_id use any of ['<2bn>', '<2gu>', '<2hi>', '<2mr>', '<2pa>', '<2ta>', '<2te>']
24
 
25
 
26
-
27
- def greet(input, task, lang):
28
  if task == "IndicWikiBio":
29
  model = wbmodel
30
  elif task == "IndicHeadlineGeneration":
31
  model = hgmodel
32
- elif task == "IndicParaphrasing":
33
  model = ppmodel
34
  elif task == "IndicSentenceSummarization":
35
  model = ssmodel
36
- elif task == "IndicQuestionGeneration":
37
  model = qgmodel
38
 
39
-
40
- inp = tokenizer(input.strip() + " </s> <2" + lang + ">", add_special_tokens=False, return_tensors="pt", padding=True).input_ids
41
- model_output=model.generate(inp, use_cache=True, num_beams=1, max_length=100, min_length=1, early_stopping=True, pad_token_id=pad_id, bos_token_id=bos_id, eos_token_id=eos_id, decoder_start_token_id=tokenizer._convert_token_to_id_with_added_voc("<2"+lang+">"))
42
-
43
-
44
- # Decode to get output strings
45
-
46
- decoded_output=tokenizer.decode(model_output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
47
 
48
  return decoded_output
49
- # return "Hi"
50
 
51
- def set_example(task, lang):
52
- # print(task)
53
- if task == "IndicWikiBio":
54
- return gr.update(value=u"<TAG> name </TAG> राम नरेश पांडेय <TAG> office </TAG> विधायक - 205 - कुशीनगर विधान सभा निर्वाचन क्षेत्र , उत्तर प्रदेश <TAG> term </TAG> 1967 से 1968 <TAG> nationality </TAG> भारतीय"), gr.update(value="hi")
55
- elif task == "IndicHeadlineGeneration":
56
- return gr.update(value=u"वैश्विक व्यापार युद्ध की शिकार हुई तुर्की की मुद्रा लीरा के डूबने से अमेरिकी डॉलर के मुकाबले रुपया अब तक के न्यूनतम स्तर पर पहुंच गया। रुपये में रिकॉर्ड गिरावट से सोने की चमक में निखार नहीं आ सकी। वैश्विक बाजार में सोना करीब आठ महीने के निचले स्तर पर पहुंच गया तो घरेलू बाजार में यह करीब नौ महीने के निचले स्तर पर चला गया। वैश्विक मंदी की आशंका से वैश्विक बाजार में चांदी करीब ढाई साल और घरेलू बाजार में तकरीबन नौ महीने के निचले स्तर पर पहुंच गई। तुर्की की आर्थिक चिंता के कारण अमेरिकी डॉलर के मुकाबले रुपया कारोबार के दौरान 70.80 के स्तर तक गिर गया। यह इसका ऐतिहासिक रिकॉर्ड निम्न स्तर है। कमजोर रुपये से सोने की चमक बढऩे की उम्मीद की जा रही थी लेकिन वैश्विक बाजार में सोने की कीमत गिरकर 1,193.50 डॉलर प्रति औंस पहुंचने के कारण घरेलू बाजार में भी सोने की चमक फीकी पड़ गई। घरेलू बाजार में सोना गिरकर 29,655 रुपये प्रति 10 ग्राम पहुंच गया। घरेलू वायदा बाजार यानी एमसीएक्स पर सोना 29,700 के आस-पास कारोबार कर रहा है। देश में इस साल सोने की मांग में लगातार गिरावट देखने को मिल रही थी। अप्रैल-जून तिमाही में सोने का आयात 25 फीसदी से भी कम हुआ है। चालू महीने में सोने की मांग बढऩे की उम्मीद जगी थी लेकिन यह उम्मीद टूट सकती है क्योंकि दुनिया के सबसे बड़े गोल्ड फंड एसपीडीआर गोल्ड की होल्डिंग अप्रैल के बाद 10 फीसदी गिर चुकी है। इस समय यह पिछले ढाई साल के निचले स्तर पर है। इस साल वैश्विक बाजार में सोना करीब 8.5 फीसदी और घरेलू बाजार में 1.5 फीसदी टूट चुका है। सराफा मामलों के जानकार अनिल अग्रवाल कहते हैं कि वैश्विक हालात ऐसे हैं कि इस समय निवेशक डॉलर में पैसा लगा रहे हैं। इस कारण दूसरी मुद्रा और जिंस दबाव में हैं। हालांकि हालात यही रहे तो सोने में तेज सुधार भी देखने को मिलेगा। वैश्विक मंदी की बढ़ती आशंका का सबसे ज्यादा असर चांदी पर पड़ रहा है। वैश्विक बाजार में चांदी के दाम ढाई साल के निचले स्तर पर पहुंच चुके हैं। वैश्विक बाजार में चांदी की कीमत 15 डॉलर प्रति औंस के करीब चल रही है। इसके पहले अप्रैल 2016 में चांदी इस स्तर पर थी। वैश्विक बाजार में चांदी के दाम दो महीने पहले 18.13 डॉलर प्रति औंस पर चल रहे थे। चांदी कारोबारी राहुल मेहता कहते हैं कि सोना और मूल धातु में कमजोरी से चांदी पर दोहरा दबाव पड़ रहा है। वैश्विक बाजार का व्यापार युद्ध अब मुद्रा युद्ध में बदल गया है। वैश्विक अर्थव्यवस्था एक बार फिर मंदी की गिरफ्त में आ सकती है जिसके कारण औद्योगिक विकास भी प्रभावित होगा। यही वजह है कि चांदी की कीमतें लगातार लुढक़ रही हैं क्योंकि मांग में कमी आने की आशंका बढ़ती जा रही है। फिलहाल घरेलू बा��ार में चांदी 37,825 रुपये प्रति किलोग्राम पर बिक रही है। तुर्की के आर्थिक संकट से एक बार फिर वैश्विक मंदी का डर है जिसका असर दुनियाभर के बाजारों पर देखा जा सकता है। इसने विश्व स्तर पर निवेशकों के रुख को प्रभावित किया है और वे डॉलर को एक सुरक्षित निवेश के तौर पर देख रहे हैं। आनंद राठी शेयर्स ऐंड स्टाक ब्रोकर्स में शोध विश्लेषक आर मारू ने कहा कि आयातकों की अधिक मांग से रुपये की विनिमय दर में गिरावट आई। उन्होंने कहा, तुर्की संकट को लेकर अनिश्चितता तथा डॉलर सूचकांक में तेजी को देखते हुए आयातक आक्रमक तरीके से डॉलर की लिवाली कर रहे हैं। दूसरी तरफ आरबीआई की तरफ से आक्रमक हस्तक्षेप न होने से भी रुपया नीचे आया। सरकार ने अमेरिकी डॉलर के मुकाबले रुपये के अब तक के न्यूनतम स्तर पर पहुंचने के लिए बाह्य कारकों को जिम्मेदार ठहराते हुए कहा कि इसमें चिंता की कोई बात नहीं है।"), gr.update(value="hi")
57
- elif task == "IndicParaphrasing":
58
- return gr.update(value=u"दिल्ली यूनिवर्सिटी देश की प्रसिद्ध यूनिवर्सिटी में से एक है."), gr.update(value="hi")
59
- elif task == "IndicSentenceSummarization":
60
- return gr.update(value=u"जम्मू एवं कश्मीर के अनंतनाग जिले में शनिवार को सुरक्षाबलों के साथ मुठभेड़ में दो आतंकवादियों को मार गिराया गया।"), gr.update(value="hi")
61
- else:
62
- return gr.update(value=u"7 फरवरी, 2016 [SEP] खेल 7 फरवरी, 2016 को कैलिफोर्निया के सांता क्लारा में सैन फ्रांसिस्को खाड़ी क्षेत्र में लेवी स्टेडियम में खेला गया था।"), gr.update(value="hi")
63
-
64
 
65
- with gr.blocks.Blocks() as block:
66
- input= gr.Textbox(label="Input")
67
- task = gr.Dropdown(["IndicWikiBio", "IndicHeadlineGeneration", "IndicParaphrasing", "IndicSentenceSummarization", "IndicQuestionGeneration"], label="Task")
68
- lang = gr.Dropdown(["as","bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"], label="Language")
69
- generate = gr.Button("Generate")
70
- output = gr.Textbox()
71
- instructions = gr.HTML("<h1>How to use:</h1><br>\
72
- 1. This space supports 5 tasks and 11 Indic languages.<br>\
73
- 2. First select the task from the dropdown box and it will show you an example of Input for Hindi. This default example display will be automated for each language in the future. Choose your language, give your input and then press the generate button. Note the formats for IndicWikiBio and Question generation when testing your own inputs. Also note that if you choose another task then the input will be replaced with the default example for that task.<br>\
74
- 3. The tasks are:<br>\
75
- 3.1 IndicWikiBio where the input is a Wikipedia table and the output will be a one sentence biograpy. You should pass the input in the following format: &lt;TAG&gt; key1 &lt;/TAG&gt; value1 &lt;TAG&gt; key2 &lt;/TAG&gt; value2.<br>\
76
- 3.2 IndicHeadlineGeneration where the input is a document or paragraph the output will be a short title. Copy a paragraph from your favorite news site and get a headline. Dont paste extemely long paragraphs. You have been warned.<br>\
77
- 3.3 IndicParaphrasing where the input is a sentence and the output is its paraphrase.<br>\
78
- 3.4 IndicSentenceSummarization where the input is a long sentence and the output is a compact version of that sentence.<br>\
79
- 3.5 IndicQuestionGeneration where the input is an answer and context and the output is the question that should be asked to get the answer. You should pass the input in the following format: ANSWER [SEP] CONTEXT.\
80
- ")
81
- task.change(set_example, inputs=[task, lang], outputs=[input, lang])
82
- generate.click(greet, inputs = [input, task, lang], outputs=output)
83
- block.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForSeq2SeqLM
3
+ from transformers import AlbertTokenizer
4
+
5
+
6
+ tokenizer = AlbertTokenizer.from_pretrained(
7
+ "ai4bharat/MultiIndicWikiBioSS", do_lower_case=False, use_fast=False, keep_accents=True)
8
+ qgmodel = AutoModelForSeq2SeqLM.from_pretrained(
9
+ "ai4bharat/MultiIndicQuestionGenerationSS").eval()
10
+ hgmodel = AutoModelForSeq2SeqLM.from_pretrained(
11
+ "ai4bharat/MultiIndicHeadlineGenerationSS").eval()
12
+ ssmodel = AutoModelForSeq2SeqLM.from_pretrained(
13
+ "ai4bharat/MultiIndicSentenceSummarizationSS").eval()
14
+ ppmodel = AutoModelForSeq2SeqLM.from_pretrained(
15
+ "ai4bharat/MultiIndicParaphraseGenerationSS").eval()
16
+ wbmodel = AutoModelForSeq2SeqLM.from_pretrained(
17
+ "ai4bharat/MultiIndicWikiBioSS").eval()
18
 
 
19
  bos_id = tokenizer._convert_token_to_id_with_added_voc("<s>")
20
  eos_id = tokenizer._convert_token_to_id_with_added_voc("</s>")
21
  pad_id = tokenizer._convert_token_to_id_with_added_voc("<pad>")
 
22
 
23
 
24
+ def generate(input, task, lang):
 
25
  if task == "IndicWikiBio":
26
  model = wbmodel
27
  elif task == "IndicHeadlineGeneration":
28
  model = hgmodel
29
+ elif task == "IndicParaphrasing":
30
  model = ppmodel
31
  elif task == "IndicSentenceSummarization":
32
  model = ssmodel
33
+ elif task == "IndicQuestionGeneration":
34
  model = qgmodel
35
 
36
+ inp = tokenizer(input.strip() + " </s> <2" + lang + ">",
37
+ add_special_tokens=False, return_tensors="pt", padding=True).input_ids
38
+ model_output = model.generate(inp, use_cache=True, num_beams=1, max_length=100, min_length=1, early_stopping=True, pad_token_id=pad_id,
39
+ bos_token_id=bos_id, eos_token_id=eos_id, decoder_start_token_id=tokenizer._convert_token_to_id_with_added_voc("<2"+lang+">"))
40
+ decoded_output = tokenizer.decode(
41
+ model_output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
 
 
42
 
43
  return decoded_output
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ tasks = ["IndicWikiBio", "IndicHeadlineGeneration", "IndicParaphrasing",
47
+ "IndicSentenceSummarization", "IndicQuestionGeneration"]
48
+ INDIC = {"Assamese": "as", "Bengali": "bn", "Gujarati": "gu", "Hindi": "hi", "Kannada": "kn",
49
+ "Malayalam": "ml", "Marathi": "mr", "Odia": "or", "Punjabi": "pa", "Tamil": "ta", "Telugu": "te"}
50
+ languages = list(INDIC.keys())
51
+
52
+ language_drop_down = gr.inputs.Dropdown(
53
+ languages, type="value", default="Hindi", label="Select Target Language")
54
+ task_drop_down = gr.inputs.Dropdown(
55
+ tasks, type="value", default="IndicSentenceSummarization", label="Select Task")
56
+ text = gr.inputs.Textbox(lines=5, placeholder="Enter Indic Text here...",
57
+ default="", label="Enter Text in corresponding Indic Language")
58
+ text_ouptut = gr.outputs.Textbox(
59
+ type="auto", label="View Generated Text in the corresponding Indic Language")
60
+
61
+ supported_lang = ', '.join(languages)
62
+
63
+ examples = [
64
+ [u"<TAG> name </TAG> राम नरेश पांडेय <TAG> office </TAG> विधायक - 205 - कुशीनगर विधान सभा निर्वाचन क्षेत्र , उत्तर प्रदेश <TAG> term </TAG> 1967 से 1968 <TAG> nationality </TAG> भारतीय", "IndicWikiBio", "Hindi"],
65
+ [u"वैश्विक व्यापार युद्ध की शिकार हुई तुर्की की मुद्रा लीरा के डूबने से अमेरिकी डॉलर के मुकाबले रुपया अब तक के न्यूनतम स्तर पर पहुंच गया। रुपये में रिकॉर्ड गिरावट से सोने की चमक में निखार नहीं आ सकी। वैश्विक बाजार में सोना करीब आठ महीने के निचले स्तर पर पहुंच गया तो घरेलू बाजार में यह करीब नौ महीने के निचले स्तर पर चला गया। वैश्विक मंदी की आशंका से वैश्विक बाजार में चांदी करीब ढाई साल और घरेलू बाजार में तकरीबन नौ महीने के निचले स्तर पर पहुंच गई। तुर्की की आर्थिक चिंता के कारण अमेरिकी डॉलर के मुकाबले रुपया कारोबार के दौरान 70.80 के स्तर तक गिर गया। यह इसका ऐतिहासिक रिकॉर्ड निम्न स्तर है। कमजोर रुपये से सोने की चमक बढऩे की उम्मीद की जा रही थी लेकिन वैश्विक बाजार में सोने की कीमत गिरकर 1,193.50 डॉलर प्रति औंस पहुंचने के कारण घरेलू बाजार में भी सोने की चमक फीकी पड़ गई। घरेलू बाजार में सोना गिरकर 29,655 रुपये प्रति 10 ग्राम पहुंच गया। घरेलू वायदा बाजार यानी एमसीएक्स पर सोना 29,700 के आस-पास कारोबार कर रहा है। देश में इस साल सोने की मांग में लगातार गिरावट देखने को मिल रही थी। अप्रैल-जून तिमाही में सोने का आयात 25 फीसदी से भी कम हुआ है। चालू महीने में सोने की मांग बढऩे की उम्मीद जगी थी लेकिन यह उम्मीद टूट सकती है क्योंकि दुनिया के सबसे बड़े गोल्ड फंड एसपीडीआर गोल्ड की होल्डिंग अप्रैल के बाद 10 फीसदी गिर चुकी है। इस समय यह पिछले ढाई साल के निचले स्तर पर है। इस साल वैश्विक बाजार में सोना करीब 8.5 फीसदी और घरेलू बाजार में 1.5 फीसदी टूट चुका है। सराफा मामलों के जानकार अनिल अग्रवाल कहते हैं कि वैश्विक हालात ऐसे हैं कि इस समय निवेशक डॉलर में ��ैसा लगा रहे हैं। इस कारण दूसरी मुद्रा और जिंस दबाव में हैं। हालांकि हालात यही रहे तो सोने में तेज सुधार भी देखने को मिलेगा। वैश्विक मंदी की बढ़ती आशंका का सबसे ज्यादा असर चांदी पर पड़ रहा है। वैश्विक बाजार में चांदी के दाम ढाई साल के निचले स्तर पर पहुंच चुके हैं। वैश्विक बाजार में चांदी की कीमत 15 डॉलर प्रति औंस के करीब चल रही है। इसके पहले अप्रैल 2016 में चांदी इस स्तर पर थी। वैश्विक बाजार में चांदी के दाम दो महीने पहले 18.13 डॉलर प्रति औंस पर चल रहे थे। चांदी कारोबारी राहुल मेहता कहते हैं कि सोना और मूल धातु में कमजोरी से चांदी पर दोहरा दबाव पड़ रहा है। वैश्विक बाजार का व्यापार युद्ध अब मुद्रा युद्ध में बदल गया है। वैश्विक अर्थव्यवस्था एक बार फिर मंदी की गिरफ्त में आ सकती है जिसके कारण औद्योगिक विकास भी प्रभावित होगा। यही वजह है कि चांदी की कीमतें लगातार लुढक़ रही हैं क्योंकि मांग में कमी आने की आशंका बढ़ती जा रही है। फिलहाल घरेलू बाजार में चांदी 37,825 रुपये प्रति किलोग्राम पर बिक रही है। तुर्की के आर्थिक संकट से एक बार फिर वैश्विक मंदी का डर है जिसका असर दुनियाभर के बाजारों पर देखा जा सकता है। इसने विश्व स्तर पर निवेशकों के रुख को प्रभावित किया है और वे डॉलर को एक सुरक्षित निवेश के तौर पर देख रहे हैं। आनंद राठी शेयर्स ऐंड स्टाक ब्रोकर्स में शोध विश्लेषक आर मारू ने कहा कि आयातकों की अधिक मांग से रुपये की विनिमय दर में गिरावट आई। उन्होंने कहा, तुर्की संकट को लेकर अनिश्चितता तथा डॉलर सूचकांक में तेजी को देखते हुए आयातक आक्रमक तरीके से डॉलर की लिवाली कर रहे हैं। दूसरी तरफ आरबीआई की तरफ से आक्रमक हस्तक्षेप न होने से भी रुपया नीचे आया। सरकार ने अमेरिकी डॉलर के मुकाबले रुपये के अब तक के न्यूनतम स्तर पर पहुंचने के लिए बाह्य कारकों को जिम्मेदार ठहराते हुए कहा कि इसमें चिंता की कोई बात नहीं है।", "IndicHeadlineGeneration", "Hindi"],
66
+ [u"दिल्ली यूनिवर्सिटी देश की प्रसिद्ध यूनिवर्सिटी में से एक है.",
67
+ "IndicParaphrasing", "Hindi"],
68
+ [u"जम्मू एवं कश्मीर के अनंतनाग जिले में शनिवार को सुरक्षाबलों के साथ मुठभेड़ में दो आतंकवादियों को मार गिराया गया।",
69
+ "IndicSentenceSummarization", "Hindi"],
70
+ [u"7 फरवरी, 2016 [SEP] खेल 7 फरवरी, 2016 को कैलिफोर्निया के सांता क्लारा में सैन फ्रांसिस्को खाड़ी क्षेत्र में लेवी स्टेडियम में खेला गया था।",
71
+ "IndicQuestionGeneration", "Hindi"]
72
+ ]
73
+
74
+ iface = gr.Interface(fn=generate, inputs=[text, task_drop_down, language_drop_down], outputs=text_ouptut, title='IndicTrans NMT System',
75
+ description='Currently the model supports ' + supported_lang, article='Original repository can be found [here](https://github.com/AI4Bharat/indicTrans)', examples=examples)
76
+ iface.launch(enable_queue=True)
77
+
78
+ # with gr.blocks.Blocks() as block:
79
+ # input = gr.Textbox(label="Input")
80
+ # task = gr.Dropdown(["IndicWikiBio", "IndicHeadlineGeneration", "IndicParaphrasing",
81
+ # "IndicSentenceSummarization", "IndicQuestionGeneration"], label="Task")
82
+ # lang = gr.Dropdown(["as", "bn", "gu", "hi", "kn", "ml",
83
+ # "mr", "or", "pa", "ta", "te"], label="Language")
84
+ # generate = gr.Button("Generate")
85
+ # output = gr.Textbox()
86
+ # instructions = gr.HTML("<h1>How to use:</h1><br>\
87
+ # 1. This space supports 5 tasks and 11 Indic languages.<br>\
88
+ # 2. First select the task from the dropdown box and it will show you an example of Input for Hindi. This default example display will be automated for each language in the future. Choose your language, give your input and then press the generate button. Note the formats for IndicWikiBio and Question generation when testing your own inputs. Also note that if you choose another task then the input will be replaced with the default example for that task.<br>\
89
+ # 3. The tasks are:<br>\
90
+ # 3.1 IndicWikiBio where the input is a Wikipedia table and the output will be a one sentence biograpy. You should pass the input in the following format: &lt;TAG&gt; key1 &lt;/TAG&gt; value1 &lt;TAG&gt; key2 &lt;/TAG&gt; value2.<br>\
91
+ # 3.2 IndicHeadlineGeneration where the input is a document or paragraph the output will be a short title. Copy a paragraph from your favorite news site and get a headline. Dont paste extemely long paragraphs. You have been warned.<br>\
92
+ # 3.3 IndicParaphrasing where the input is a sentence and the output is its paraphrase.<br>\
93
+ # 3.4 IndicSentenceSummarization where the input is a long sentence and the output is a compact version of that sentence.<br>\
94
+ # 3.5 IndicQuestionGeneration where the input is an answer and context and the output is the question that should be asked to get the answer. You should pass the input in the following format: ANSWER [SEP] CONTEXT.\
95
+ # ")
96
+ # task.change(set_example, inputs=[task, lang], outputs=[input, lang])
97
+ # generate.click(greet, inputs=[input, task, lang], outputs=output)
98
+ # block.launch()