Spaces:
Runtime error
Runtime error
Commit
·
ca81517
1
Parent(s):
cfa4f27
Create home page
Browse files- app.py +5 -1
- apps/home.py +10 -79
- apps/turna.py +106 -0
app.py
CHANGED
@@ -10,6 +10,8 @@ import apps.sentiment
|
|
10 |
import apps.categorization
|
11 |
import apps.ner
|
12 |
import apps.pos_tagging
|
|
|
|
|
13 |
|
14 |
st.set_page_config(
|
15 |
page_title="Turna",
|
@@ -25,7 +27,9 @@ PAGES = {
|
|
25 |
"Sentiment Classification": apps.sentiment,
|
26 |
"Text Categorization": apps.categorization,
|
27 |
"Named Entity Recognition": apps.ner,
|
28 |
-
"Part-of-Speech Tagging": apps.pos_tagging
|
|
|
|
|
29 |
}
|
30 |
|
31 |
st.sidebar.title("Navigation")
|
|
|
10 |
import apps.categorization
|
11 |
import apps.ner
|
12 |
import apps.pos_tagging
|
13 |
+
import apps.sts
|
14 |
+
import apps.turna
|
15 |
|
16 |
st.set_page_config(
|
17 |
page_title="Turna",
|
|
|
27 |
"Sentiment Classification": apps.sentiment,
|
28 |
"Text Categorization": apps.categorization,
|
29 |
"Named Entity Recognition": apps.ner,
|
30 |
+
"Part-of-Speech Tagging": apps.pos_tagging,
|
31 |
+
"Semantic Textual Similarity": apps.sts,
|
32 |
+
"Text Generation": apps.turna,
|
33 |
}
|
34 |
|
35 |
st.sidebar.title("Navigation")
|
apps/home.py
CHANGED
@@ -21,86 +21,17 @@ def write():
|
|
21 |
|
22 |
st.markdown(
|
23 |
"""
|
24 |
-
|
25 |
-
<h3 style="text-align:right;">TURNA is a Turkish encoder-decoder language model.</h3>
|
26 |
-
|
27 |
-
<p style="text-align:right;"><p>
|
28 |
-
<p style="text-align:right;">Use the generation parameters on the sidebar to adjust generation quality.</p>
|
29 |
<p style="text-align:right;"><p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
""",
|
31 |
unsafe_allow_html=True,
|
32 |
)
|
33 |
-
|
34 |
-
|
35 |
-
#st.write('...with Turna')
|
36 |
-
|
37 |
-
# Sidebar
|
38 |
-
|
39 |
-
# Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
|
40 |
-
st.sidebar.subheader("Configurable parameters")
|
41 |
-
|
42 |
-
max_new_tokens = st.sidebar.number_input(
|
43 |
-
"Maximum length",
|
44 |
-
min_value=0,
|
45 |
-
max_value=512,
|
46 |
-
value=128,
|
47 |
-
help="The maximum length of the sequence to be generated.",
|
48 |
-
)
|
49 |
-
length_penalty = st.sidebar.number_input(
|
50 |
-
"Length penalty",
|
51 |
-
value=1.0,
|
52 |
-
help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
|
53 |
-
)
|
54 |
-
do_sample = st.sidebar.selectbox(
|
55 |
-
"Sampling?",
|
56 |
-
(True, False),
|
57 |
-
help="Whether or not to use sampling; use greedy decoding otherwise.",
|
58 |
-
)
|
59 |
-
num_beams = st.sidebar.number_input(
|
60 |
-
"Number of beams",
|
61 |
-
min_value=1,
|
62 |
-
max_value=10,
|
63 |
-
value=3,
|
64 |
-
help="The number of beams to use for beam search.",
|
65 |
-
)
|
66 |
-
repetition_penalty = st.sidebar.number_input(
|
67 |
-
"Repetition Penalty",
|
68 |
-
min_value=0.0,
|
69 |
-
value=3.0,
|
70 |
-
step=0.1,
|
71 |
-
help="The parameter for repetition penalty. 1.0 means no penalty",
|
72 |
-
)
|
73 |
-
no_repeat_ngram_size = st.sidebar.number_input(
|
74 |
-
"No Repeat N-Gram Size",
|
75 |
-
min_value=0,
|
76 |
-
value=3,
|
77 |
-
help="If set to int > 0, all ngrams of that size can only occur once.",
|
78 |
-
)
|
79 |
-
temp = st.sidebar.slider(
|
80 |
-
"Temperature",
|
81 |
-
value=1.0,
|
82 |
-
min_value=0.1,
|
83 |
-
max_value=100.0,
|
84 |
-
help="The value used to module the next token probabilities.",
|
85 |
-
)
|
86 |
-
top_k = st.sidebar.number_input(
|
87 |
-
"Top k",
|
88 |
-
value=10,
|
89 |
-
help="The number of highest probability vocabulary tokens to keep for top-k-filtering.",
|
90 |
-
)
|
91 |
-
top_p = st.sidebar.number_input(
|
92 |
-
"Top p",
|
93 |
-
value=0.95,
|
94 |
-
help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.",
|
95 |
-
)
|
96 |
-
|
97 |
-
input_text = st.text_area(label='Enter a text: ', height=100,
|
98 |
-
value="Türkiye'nin başkenti neresidir?")
|
99 |
-
url = "https://api-inference.huggingface.co/models/boun-tabi-LMG/TURNA"
|
100 |
-
params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens,
|
101 |
-
"do_sample":do_sample, "num_beams":num_beams, "repetition_penalty":repetition_penalty,
|
102 |
-
"top_p":top_p, "top_k":top_k, "temperature":temp, "early_stopping": True, "max_length": 256}
|
103 |
-
if st.button("Generate"):
|
104 |
-
with st.spinner('Generating...'):
|
105 |
-
output = query(f'[S2S] {input_text}<EOS>', url, params)
|
106 |
-
st.success(output)
|
|
|
21 |
|
22 |
st.markdown(
|
23 |
"""
|
24 |
+
<h3 style="text-align:left;">... is a Turkish encoder-decoder language model.</h3>
|
|
|
|
|
|
|
|
|
25 |
<p style="text-align:right;"><p>
|
26 |
+
<p style="text-align:left;">In this Huggingface space, you can test the TURNA language model. </p>
|
27 |
+
<p style="text-align:left;">The model contains 1.1B parameters, and was pre-trained with an encoder-decoder architecture following the UL2 framework on 43B tokens from various domains. </p>
|
28 |
+
<p style="text-align:left;">TURNA was fine-tuned to carry out Turkish summarization, paraphrasing, news title generation, sentiment classification, text categorization, named entity recognition, part-of-speech tagging, semantic textual similarity and natural language inference tasks. </p>
|
29 |
+
<p style="text-align:left;">Go to the <strong>Navigation</strong> bar to access our applications. </p>
|
30 |
+
<p style="text-align:left;">Refer to our <a href="https://arxiv.org/abs/2401.14373">paper</a> for more details... </p>
|
31 |
+
<p style="text-align:left;"><p>
|
32 |
+
<p style="text-align:right;"><em>TURNA can generate toxic content or provide erroneous information. Double-check before usage. </em><p>
|
33 |
""",
|
34 |
unsafe_allow_html=True,
|
35 |
)
|
36 |
+
|
37 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
apps/turna.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import streamlit as st
|
3 |
+
import time
|
4 |
+
from transformers import pipeline
|
5 |
+
import os
|
6 |
+
from .utils import query
|
7 |
+
|
8 |
+
def write():
|
9 |
+
st.markdown(
|
10 |
+
"""
|
11 |
+
<h1 style="text-align:left;">TURNA</h1>
|
12 |
+
""",
|
13 |
+
unsafe_allow_html=True,
|
14 |
+
)
|
15 |
+
|
16 |
+
st.write("#")
|
17 |
+
|
18 |
+
col = st.columns(2)
|
19 |
+
|
20 |
+
col[0].image("images/turna-logo.png", width=100)
|
21 |
+
|
22 |
+
st.markdown(
|
23 |
+
"""
|
24 |
+
|
25 |
+
<h3 style="text-align:right;">TURNA is a Turkish encoder-decoder language model.</h3>
|
26 |
+
|
27 |
+
<p style="text-align:right;"><p>
|
28 |
+
<p style="text-align:right;">Use the generation parameters on the sidebar to adjust generation quality.</p>
|
29 |
+
<p style="text-align:right;"><p>
|
30 |
+
""",
|
31 |
+
unsafe_allow_html=True,
|
32 |
+
)
|
33 |
+
|
34 |
+
#st.title('Turkish Language Generation')
|
35 |
+
#st.write('...with Turna')
|
36 |
+
|
37 |
+
# Sidebar
|
38 |
+
|
39 |
+
# Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
|
40 |
+
st.sidebar.subheader("Configurable parameters")
|
41 |
+
|
42 |
+
max_new_tokens = st.sidebar.number_input(
|
43 |
+
"Maximum length",
|
44 |
+
min_value=0,
|
45 |
+
max_value=512,
|
46 |
+
value=128,
|
47 |
+
help="The maximum length of the sequence to be generated.",
|
48 |
+
)
|
49 |
+
length_penalty = st.sidebar.number_input(
|
50 |
+
"Length penalty",
|
51 |
+
value=1.0,
|
52 |
+
help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
|
53 |
+
)
|
54 |
+
do_sample = st.sidebar.selectbox(
|
55 |
+
"Sampling?",
|
56 |
+
(True, False),
|
57 |
+
help="Whether or not to use sampling; use greedy decoding otherwise.",
|
58 |
+
)
|
59 |
+
num_beams = st.sidebar.number_input(
|
60 |
+
"Number of beams",
|
61 |
+
min_value=1,
|
62 |
+
max_value=10,
|
63 |
+
value=3,
|
64 |
+
help="The number of beams to use for beam search.",
|
65 |
+
)
|
66 |
+
repetition_penalty = st.sidebar.number_input(
|
67 |
+
"Repetition Penalty",
|
68 |
+
min_value=0.0,
|
69 |
+
value=3.0,
|
70 |
+
step=0.1,
|
71 |
+
help="The parameter for repetition penalty. 1.0 means no penalty",
|
72 |
+
)
|
73 |
+
no_repeat_ngram_size = st.sidebar.number_input(
|
74 |
+
"No Repeat N-Gram Size",
|
75 |
+
min_value=0,
|
76 |
+
value=3,
|
77 |
+
help="If set to int > 0, all ngrams of that size can only occur once.",
|
78 |
+
)
|
79 |
+
temp = st.sidebar.slider(
|
80 |
+
"Temperature",
|
81 |
+
value=1.0,
|
82 |
+
min_value=0.1,
|
83 |
+
max_value=100.0,
|
84 |
+
help="The value used to module the next token probabilities.",
|
85 |
+
)
|
86 |
+
top_k = st.sidebar.number_input(
|
87 |
+
"Top k",
|
88 |
+
value=10,
|
89 |
+
help="The number of highest probability vocabulary tokens to keep for top-k-filtering.",
|
90 |
+
)
|
91 |
+
top_p = st.sidebar.number_input(
|
92 |
+
"Top p",
|
93 |
+
value=0.95,
|
94 |
+
help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.",
|
95 |
+
)
|
96 |
+
|
97 |
+
input_text = st.text_area(label='Enter a text: ', height=100,
|
98 |
+
value="Türkiye'nin başkenti neresidir?")
|
99 |
+
url = "https://api-inference.huggingface.co/models/boun-tabi-LMG/TURNA"
|
100 |
+
params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens,
|
101 |
+
"do_sample":do_sample, "num_beams":num_beams, "repetition_penalty":repetition_penalty,
|
102 |
+
"top_p":top_p, "top_k":top_k, "temperature":temp, "early_stopping": True, "max_length": 256}
|
103 |
+
if st.button("Generate"):
|
104 |
+
with st.spinner('Generating...'):
|
105 |
+
output = query(f'[S2S] {input_text}<EOS>', url, params)
|
106 |
+
st.success(output)
|