yirmibesogluz commited on
Commit
ca81517
·
1 Parent(s): cfa4f27

Create home page

Browse files
Files changed (3) hide show
  1. app.py +5 -1
  2. apps/home.py +10 -79
  3. apps/turna.py +106 -0
app.py CHANGED
@@ -10,6 +10,8 @@ import apps.sentiment
10
  import apps.categorization
11
  import apps.ner
12
  import apps.pos_tagging
 
 
13
 
14
  st.set_page_config(
15
  page_title="Turna",
@@ -25,7 +27,9 @@ PAGES = {
25
  "Sentiment Classification": apps.sentiment,
26
  "Text Categorization": apps.categorization,
27
  "Named Entity Recognition": apps.ner,
28
- "Part-of-Speech Tagging": apps.pos_tagging
 
 
29
  }
30
 
31
  st.sidebar.title("Navigation")
 
10
  import apps.categorization
11
  import apps.ner
12
  import apps.pos_tagging
13
+ import apps.sts
14
+ import apps.turna
15
 
16
  st.set_page_config(
17
  page_title="Turna",
 
27
  "Sentiment Classification": apps.sentiment,
28
  "Text Categorization": apps.categorization,
29
  "Named Entity Recognition": apps.ner,
30
+ "Part-of-Speech Tagging": apps.pos_tagging,
31
+ "Semantic Textual Similarity": apps.sts,
32
+ "Text Generation": apps.turna,
33
  }
34
 
35
  st.sidebar.title("Navigation")
apps/home.py CHANGED
@@ -21,86 +21,17 @@ def write():
21
 
22
  st.markdown(
23
  """
24
-
25
- <h3 style="text-align:right;">TURNA is a Turkish encoder-decoder language model.</h3>
26
-
27
- <p style="text-align:right;"><p>
28
- <p style="text-align:right;">Use the generation parameters on the sidebar to adjust generation quality.</p>
29
  <p style="text-align:right;"><p>
 
 
 
 
 
 
 
30
  """,
31
  unsafe_allow_html=True,
32
  )
33
-
34
- #st.title('Turkish Language Generation')
35
- #st.write('...with Turna')
36
-
37
- # Sidebar
38
-
39
- # Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
40
- st.sidebar.subheader("Configurable parameters")
41
-
42
- max_new_tokens = st.sidebar.number_input(
43
- "Maximum length",
44
- min_value=0,
45
- max_value=512,
46
- value=128,
47
- help="The maximum length of the sequence to be generated.",
48
- )
49
- length_penalty = st.sidebar.number_input(
50
- "Length penalty",
51
- value=1.0,
52
- help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
53
- )
54
- do_sample = st.sidebar.selectbox(
55
- "Sampling?",
56
- (True, False),
57
- help="Whether or not to use sampling; use greedy decoding otherwise.",
58
- )
59
- num_beams = st.sidebar.number_input(
60
- "Number of beams",
61
- min_value=1,
62
- max_value=10,
63
- value=3,
64
- help="The number of beams to use for beam search.",
65
- )
66
- repetition_penalty = st.sidebar.number_input(
67
- "Repetition Penalty",
68
- min_value=0.0,
69
- value=3.0,
70
- step=0.1,
71
- help="The parameter for repetition penalty. 1.0 means no penalty",
72
- )
73
- no_repeat_ngram_size = st.sidebar.number_input(
74
- "No Repeat N-Gram Size",
75
- min_value=0,
76
- value=3,
77
- help="If set to int > 0, all ngrams of that size can only occur once.",
78
- )
79
- temp = st.sidebar.slider(
80
- "Temperature",
81
- value=1.0,
82
- min_value=0.1,
83
- max_value=100.0,
84
- help="The value used to module the next token probabilities.",
85
- )
86
- top_k = st.sidebar.number_input(
87
- "Top k",
88
- value=10,
89
- help="The number of highest probability vocabulary tokens to keep for top-k-filtering.",
90
- )
91
- top_p = st.sidebar.number_input(
92
- "Top p",
93
- value=0.95,
94
- help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.",
95
- )
96
-
97
- input_text = st.text_area(label='Enter a text: ', height=100,
98
- value="Türkiye'nin başkenti neresidir?")
99
- url = "https://api-inference.huggingface.co/models/boun-tabi-LMG/TURNA"
100
- params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens,
101
- "do_sample":do_sample, "num_beams":num_beams, "repetition_penalty":repetition_penalty,
102
- "top_p":top_p, "top_k":top_k, "temperature":temp, "early_stopping": True, "max_length": 256}
103
- if st.button("Generate"):
104
- with st.spinner('Generating...'):
105
- output = query(f'[S2S] {input_text}<EOS>', url, params)
106
- st.success(output)
 
21
 
22
  st.markdown(
23
  """
24
+ <h3 style="text-align:left;">... is a Turkish encoder-decoder language model.</h3>
 
 
 
 
25
  <p style="text-align:right;"><p>
26
+ <p style="text-align:left;">In this Huggingface space, you can test the TURNA language model. </p>
27
+ <p style="text-align:left;">The model contains 1.1B parameters, and was pre-trained with an encoder-decoder architecture following the UL2 framework on 43B tokens from various domains. </p>
28
+ <p style="text-align:left;">TURNA was fine-tuned to carry out Turkish summarization, paraphrasing, news title generation, sentiment classification, text categorization, named entity recognition, part-of-speech tagging, semantic textual similarity and natural language inference tasks. </p>
29
+ <p style="text-align:left;">Go to the <strong>Navigation</strong> bar to access our applications. </p>
30
+ <p style="text-align:left;">Refer to our <a href="https://arxiv.org/abs/2401.14373">paper</a> for more details... </p>
31
+ <p style="text-align:left;"><p>
32
+ <p style="text-align:right;"><em>TURNA can generate toxic content or provide erroneous information. Double-check before usage. </em><p>
33
  """,
34
  unsafe_allow_html=True,
35
  )
36
+
37
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
apps/turna.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ import time
4
+ from transformers import pipeline
5
+ import os
6
+ from .utils import query
7
+
8
+ def write():
9
+ st.markdown(
10
+ """
11
+ <h1 style="text-align:left;">TURNA</h1>
12
+ """,
13
+ unsafe_allow_html=True,
14
+ )
15
+
16
+ st.write("#")
17
+
18
+ col = st.columns(2)
19
+
20
+ col[0].image("images/turna-logo.png", width=100)
21
+
22
+ st.markdown(
23
+ """
24
+
25
+ <h3 style="text-align:right;">TURNA is a Turkish encoder-decoder language model.</h3>
26
+
27
+ <p style="text-align:right;"><p>
28
+ <p style="text-align:right;">Use the generation parameters on the sidebar to adjust generation quality.</p>
29
+ <p style="text-align:right;"><p>
30
+ """,
31
+ unsafe_allow_html=True,
32
+ )
33
+
34
+ #st.title('Turkish Language Generation')
35
+ #st.write('...with Turna')
36
+
37
+ # Sidebar
38
+
39
+ # Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
40
+ st.sidebar.subheader("Configurable parameters")
41
+
42
+ max_new_tokens = st.sidebar.number_input(
43
+ "Maximum length",
44
+ min_value=0,
45
+ max_value=512,
46
+ value=128,
47
+ help="The maximum length of the sequence to be generated.",
48
+ )
49
+ length_penalty = st.sidebar.number_input(
50
+ "Length penalty",
51
+ value=1.0,
52
+ help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
53
+ )
54
+ do_sample = st.sidebar.selectbox(
55
+ "Sampling?",
56
+ (True, False),
57
+ help="Whether or not to use sampling; use greedy decoding otherwise.",
58
+ )
59
+ num_beams = st.sidebar.number_input(
60
+ "Number of beams",
61
+ min_value=1,
62
+ max_value=10,
63
+ value=3,
64
+ help="The number of beams to use for beam search.",
65
+ )
66
+ repetition_penalty = st.sidebar.number_input(
67
+ "Repetition Penalty",
68
+ min_value=0.0,
69
+ value=3.0,
70
+ step=0.1,
71
+ help="The parameter for repetition penalty. 1.0 means no penalty",
72
+ )
73
+ no_repeat_ngram_size = st.sidebar.number_input(
74
+ "No Repeat N-Gram Size",
75
+ min_value=0,
76
+ value=3,
77
+ help="If set to int > 0, all ngrams of that size can only occur once.",
78
+ )
79
+ temp = st.sidebar.slider(
80
+ "Temperature",
81
+ value=1.0,
82
+ min_value=0.1,
83
+ max_value=100.0,
84
+ help="The value used to module the next token probabilities.",
85
+ )
86
+ top_k = st.sidebar.number_input(
87
+ "Top k",
88
+ value=10,
89
+ help="The number of highest probability vocabulary tokens to keep for top-k-filtering.",
90
+ )
91
+ top_p = st.sidebar.number_input(
92
+ "Top p",
93
+ value=0.95,
94
+ help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.",
95
+ )
96
+
97
+ input_text = st.text_area(label='Enter a text: ', height=100,
98
+ value="Türkiye'nin başkenti neresidir?")
99
+ url = "https://api-inference.huggingface.co/models/boun-tabi-LMG/TURNA"
100
+ params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens,
101
+ "do_sample":do_sample, "num_beams":num_beams, "repetition_penalty":repetition_penalty,
102
+ "top_p":top_p, "top_k":top_k, "temperature":temp, "early_stopping": True, "max_length": 256}
103
+ if st.button("Generate"):
104
+ with st.spinner('Generating...'):
105
+ output = query(f'[S2S] {input_text}<EOS>', url, params)
106
+ st.success(output)