Abinaya Mahendiran commited on
Commit
e4461ed
1 Parent(s): fb12737

Updated app

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +43 -28
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Tamil
3
  emoji: 💻
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: streamlit
7
  app_file: app.py
8
  pinned: false
1
  ---
2
  title: Tamil
3
  emoji: 💻
4
+ colorFrom: yellow
5
+ colorTo: greeen
6
  sdk: streamlit
7
  app_file: app.py
8
  pinned: false
app.py CHANGED
@@ -14,6 +14,7 @@ with open("config.json") as f:
14
  # Set page layout
15
  st.set_page_config(
16
  page_title="Tamil Language Models",
 
17
  layout="wide",
18
  initial_sidebar_state="expanded"
19
  )
@@ -24,59 +25,73 @@ def load_model(model_name):
24
  with st.spinner('Waiting for the model to load.....'):
25
  model = AutoModelWithLMHead.from_pretrained(model_name)
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
27
- st.success('Model loaded!!')
28
  return model, tokenizer
29
 
30
  # Side bar
31
  img = st.sidebar.image("images/tamil_logo.jpg", width=300)
32
 
33
  # Choose the model based on selection
34
- page = st.sidebar.selectbox("Model", config["models"])
35
- data = st.sidebar.selectbox("Data", config[page])
 
 
 
 
 
36
 
37
  # Main page
38
  st.title("Tamil Language Demos")
39
  st.markdown(
40
- "This demo uses [GPT2 trained on Oscar dataset](https://huggingface.co/flax-community/gpt-2-tamil) "
41
- "and [GPT2 trained on Oscar & Indic Corpus dataset] (https://huggingface.co/abinayam/gpt-2-tamil) "
42
  "to show language generation!"
43
  )
44
 
 
 
 
45
  if page == 'Text Generation' and data == 'Oscar':
46
  st.header('Tamil text generation with GPT2')
47
- st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data')
48
  model, tokenizer = load_model(config[data])
49
  # Set default options
50
- seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்')
51
- #seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5)
52
- max_len = st.number_input('Length of the sentence', 5, 300, 100)
 
 
 
 
 
 
 
53
  gen_bt = st.button('Generate')
54
- if gen_bt:
55
- try:
56
- with st.spinner('Generating...'):
57
- generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
58
- seqs = generator(seed, max_length=max_len)[0]['generated_text']# num_return_sequences=seq_num)
59
- st.write(seqs)
60
- except Exception as e:
61
- st.exception(f'Exception: {e}')
62
  elif page == 'Text Generation' and data == "Oscar + Indic Corpus":
63
  st.header('Tamil text generation with GPT2')
64
- st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data')
65
  model, tokenizer = load_model(config[data])
66
  # Set default options
67
- seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்')
68
- #seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5)
69
- max_len = st.number_input('Length of the sentence', 5, 300, 100)
 
 
 
 
 
 
 
70
  gen_bt = st.button('Generate')
71
- if gen_bt:
 
 
 
 
 
72
  try:
73
  with st.spinner('Generating...'):
74
  generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
75
- seqs = generator(seed, max_length=max_len)[0]['generated_text'] #num_return_sequences=seq_num)
76
  st.write(seqs)
77
  except Exception as e:
78
- st.exception(f'Exception: {e}')
79
- else:
80
- st.title('Tamil News classification with Finetuned GPT2')
81
- st.markdown('In progress')
82
-
14
  # Set page layout
15
  st.set_page_config(
16
  page_title="Tamil Language Models",
17
+ page_icon="✍️",
18
  layout="wide",
19
  initial_sidebar_state="expanded"
20
  )
25
  with st.spinner('Waiting for the model to load.....'):
26
  model = AutoModelWithLMHead.from_pretrained(model_name)
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
28
  return model, tokenizer
29
 
30
  # Side bar
31
  img = st.sidebar.image("images/tamil_logo.jpg", width=300)
32
 
33
  # Choose the model based on selection
34
+ st.sidebar.title("கதை சொல்லி!")
35
+ page = st.sidebar.selectbox(label="Select model",
36
+ options=config["models"],
37
+ help="Select the model to generate the text")
38
+ data = st.sidebar.selectbox(label="Select data",
39
+ options=config[page],
40
+ help="Select the data on which the model is trained")
41
 
42
  # Main page
43
  st.title("Tamil Language Demos")
44
  st.markdown(
45
+ "Built as part of the Flax/Jax Community week, this demo uses [GPT2 trained on Oscar dataset](https://huggingface.co/flax-community/gpt-2-tamil) "
46
+ "and [GPT2 trained on Oscar & IndicNLP dataset] (https://huggingface.co/abinayam/gpt-2-tamil) "
47
  "to show language generation!"
48
  )
49
 
50
+ # Set default options for examples
51
+ prompts = config["examples"] + ["Custom"]
52
+
53
  if page == 'Text Generation' and data == 'Oscar':
54
  st.header('Tamil text generation with GPT2')
55
+ st.markdown('A simple demo using gpt-2-tamil model trained on Oscar dataset!')
56
  model, tokenizer = load_model(config[data])
57
  # Set default options
58
+ prompt = st.selectbox('Examples', prompts, index=len(prompts) - 1)
59
+ if prompt == "Custom":
60
+ prompt_box = ""
61
+ else:
62
+ prompt_box = prompt
63
+ text = st.text_input(
64
+ 'Add your custom text in Tamil',
65
+ "",
66
+ max_chars=1000)
67
+ max_len = st.slider('Length of the sentence to generate', 25, 300, 100)
68
  gen_bt = st.button('Generate')
 
 
 
 
 
 
 
 
69
  elif page == 'Text Generation' and data == "Oscar + Indic Corpus":
70
  st.header('Tamil text generation with GPT2')
71
+ st.markdown('A simple demo using gpt-2-tamil model trained on Oscar + IndicNLP dataset')
72
  model, tokenizer = load_model(config[data])
73
  # Set default options
74
+ prompt = st.selectbox('Examples', prompts, index=len(prompts) - 1)
75
+ if prompt == "Custom":
76
+ prompt_box = ""
77
+ else:
78
+ prompt_box = prompt
79
+ text = st.text_input(
80
+ 'Add your custom text in Tamil',
81
+ "",
82
+ max_chars=1000)
83
+ max_len = st.slider('Length of the sentence', 5, 300, 100)
84
  gen_bt = st.button('Generate')
85
+ else:
86
+ st.title('Tamil News classification with Finetuned GPT2')
87
+ st.markdown('In progress')
88
+
89
+ # Generate text
90
+ if gen_bt:
91
  try:
92
  with st.spinner('Generating...'):
93
  generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
94
+ seqs = generator(prompt_box, max_length=max_len)[0]['generated_text']
95
  st.write(seqs)
96
  except Exception as e:
97
+ st.exception(f'Exception: {e}')