File size: 14,065 Bytes
8f59407
 
f9b8716
8f59407
 
ee7626a
8f59407
ba2a3ae
 
 
 
 
 
 
 
 
7d6f3d4
 
ba2a3ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f59407
0325d36
ecffc39
ba2a3ae
 
8f59407
 
 
 
 
 
 
 
123980d
38dd92a
ba01e7c
09834f8
ddb7329
ca9e330
0325d36
ddb7329
ca9e330
c629a2b
ddb7329
ca9e330
09834f8
c629a2b
f9b8716
0325d36
f9b8716
ca9e330
ddb7329
0325d36
 
c629a2b
f9b8716
0325d36
 
c629a2b
f9b8716
 
 
c629a2b
09834f8
c629a2b
 
09834f8
c629a2b
 
f9b8716
 
09834f8
 
c629a2b
 
f9b8716
c629a2b
 
f9b8716
 
 
 
 
0325d36
c629a2b
f9b8716
0325d36
 
c629a2b
f9b8716
 
 
 
0325d36
 
c629a2b
f9b8716
0325d36
c629a2b
8f59407
123980d
 
 
8f59407
123980d
8f59407
a26ce70
b42a3ee
 
f612201
a26ce70
0598d8e
a26ce70
26bcd5f
 
aab3062
a26ce70
 
123980d
7d6f3d4
 
ba2a3ae
8f59407
ff299b4
0325d36
ba2a3ae
 
0325d36
 
 
 
 
 
 
 
 
 
5c594a5
 
0325d36
a26ce70
 
 
 
272ce33
198feb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74028a7
 
a26ce70
74028a7
a26ce70
74028a7
f9b32f8
f612201
ed6ef70
272ce33
f612201
a26ce70
123980d
 
 
 
 
 
 
 
 
272ce33
978d584
123980d
0325d36
 
 
 
 
 
 
 
 
 
 
0be38ca
0325d36
 
09834f8
 
 
 
ecffc39
0325d36
ecffc39
0325d36
ecffc39
0325d36
09834f8
bcd8f3a
09834f8
 
 
 
 
ecffc39
0325d36
ecffc39
 
c629a2b
 
09834f8
bcd8f3a
09834f8
 
 
 
ecffc39
0325d36
e3def2b
 
0325d36
c629a2b
ecffc39
bcd8f3a
f9b8716
09834f8
f9b8716
 
ecffc39
0325d36
ecffc39
 
c629a2b
 
f9b8716
bcd8f3a
8f59407
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
import gradio as gr
import spaces
from transformers import pipeline
import torch

DESCRIPTION="""

### a Turkish encoder-decoder language model 

Welcome to our Huggingface space, where you can explore the capabilities of TURNA. 
			
**Key Features of TURNA:**

- **Powerful Architecture:** TURNA contains 1.1B parameters, and was pre-trained with an encoder-decoder architecture following the UL2 framework on 43B tokens from various domains.
- **Diverse Training Data:** Our model is trained on a varied dataset of 43 billion tokens, covering a wide array of domains.
- **Broad Applications:** TURNA is fine-tuned for a variety of generation and understanding tasks, including:
    
    - Summarization
	- Paraphrasing
	- News title generation
	- Sentiment classification
	- Text categorization
	- Named entity recognition
	- Part-of-speech tagging
	- Semantic textual similarity
	- Natural language inference

Refer to our [paper](https://arxiv.org/abs/2401.14373) for more details.

### Citation
```bibtex
@misc{uludoğan2024turna,
	title={TURNA: A Turkish Encoder-Decoder Language Model for Enhanced Understanding and Generation}, 
	author={Gökçe Uludoğan and Zeynep Yirmibeşoğlu Balal and Furkan Akkurt and Melikşah Türker and Onur Güngör and Susan Üsküdarlı},
	year={2024},
	eprint={2401.14373},
	archivePrefix={arXiv},
	primaryClass={cs.CL}
}
```

**Note:** First inference might take time as the models are downloaded on-the-go.

*TURNA can generate toxic content or provide erroneous information. Double-check before usage.*

"""


sentiment_example = [["Bu üründen çok memnun kaldım."]]
long_text = [["Eyfel Kulesi (Fransızca: La tour Eiffel [la tuʀ ɛˈfɛl]), Paris'teki demir kule. Kule, aynı zamanda tüm dünyada Fransa'nın sembolü halini almıştır. İsmini, inşa ettiren Fransız inşaat mühendisi Gustave Eiffel'den alır.[1] En büyük turizm cazibelerinden biri olan Eyfel Kulesi, yılda 6 milyon turist çeker. 2002 yılında toplam ziyaretçi sayısı 200 milyona ulaşmıştır."]]
ner_example = [["Benim adım Turna."]]
t2t_example = [["Paraphrase: Bu üründen çok memnun kaldım."]]
nli_example = [["Bunu çok beğendim. Bunu çok sevdim."]]
text_category_example = [[" anadolu_efes e 18 lik star ! beko_basketbol_ligi nde iddialı bir kadroyla sezona giren anadolu_efes transfer harekatına devam ediyor"]]



@spaces.GPU
def nli(input, model_choice="turna_nli_nli_tr"):
    
    if model_choice=="turna_nli_nli_tr":
        nli_model = pipeline(model="boun-tabi-LMG/turna_nli_nli_tr", device=0) 
        return nli_model(input)[0]["generated_text"]
    else:
        stsb_model = pipeline(model="boun-tabi-LMG/turna_semantic_similarity_stsb_tr", device=0)

        return stsb_model(input)[0]["generated_text"]


@spaces.GPU
def sentiment_analysis(input, model_choice="turna_classification_17bintweet_sentiment"):
    if model_choice=="turna_classification_17bintweet_sentiment":
        sentiment_model = pipeline(model="boun-tabi-LMG/turna_classification_17bintweet_sentiment", device=0) 

        return sentiment_model(input)[0]["generated_text"]
    else:
        product_reviews = pipeline(model="boun-tabi-LMG/turna_classification_tr_product_reviews", device=0) 

        return product_reviews(input)[0]["generated_text"]


@spaces.GPU
def pos(input, model_choice="turna_pos_imst"): 
    if model_choice=="turna_pos_imst":
        pos_imst = pipeline(model="boun-tabi-LMG/turna_pos_imst", device=0) 
        return pos_imst(input)[0]["generated_text"]
    else:
        pos_boun = pipeline(model="boun-tabi-LMG/turna_pos_boun", device=0)
        return pos_boun(input)[0]["generated_text"]

@spaces.GPU
def ner(input, model_choice="turna_ner_wikiann"):
    if model_choice=="turna_ner_wikiann":
        ner_wikiann = pipeline(model="boun-tabi-LMG/turna_ner_wikiann", device=0) 
        return ner_wikiann(input)[0]["generated_text"]
    else:
        ner_model = pipeline(model="boun-tabi-LMG/turna_ner_milliyet", device=0) 
        return ner_model(input)[0]["generated_text"]


@spaces.GPU
def paraphrase(input, model_choice="turna_paraphrasing_tatoeba"):
    if model_choice=="turna_paraphrasing_tatoeba":
        paraphrasing = pipeline(model="boun-tabi-LMG/turna_paraphrasing_tatoeba", device=0) 
        return paraphrasing(input)[0]["generated_text"]
    else:
        paraphrasing_sub = pipeline(model="boun-tabi-LMG/turna_paraphrasing_opensubtitles", device=0) 

        return paraphrasing_sub(input)[0]["generated_text"]
        
@spaces.GPU    
def summarize(input, model_choice="turna_summarization_tr_news"):
    if model_choice=="turna_summarization_tr_news":
        news_sum = pipeline(model="boun-tabi-LMG/turna_summarization_tr_news", device=0) 

        return news_sum(input)[0]["generated_text"]
    else:
        summarization_model = pipeline(model="boun-tabi-LMG/turna_summarization_mlsum", device=0) 
        return summarization_model(input)[0]["generated_text"]

@spaces.GPU
def categorize(input):
    ttc = pipeline(model="boun-tabi-LMG/turna_classification_ttc4900", device=0) 

    return ttc(input)[0]["generated_text"]

@spaces.GPU
def turna(input, max_new_tokens, length_penalty,
                                    top_k, top_p, temp, num_beams,
                                    do_sample, no_repeat_ngram_size, repetition_penalty):
    turna = pipeline(model="boun-tabi-LMG/TURNA", device=0) 
    input = f"[S2S] {input}<EOS>"

    return turna(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty,
                                    top_k=top_k, top_p=top_p, temperature=temp, num_beams=num_beams,
                                    do_sample = do_sample, no_repeat_ngram_size=no_repeat_ngram_size, repetition_penalty=repetition_penalty)[0]["generated_text"]


with gr.Blocks(theme="abidlabs/Lime") as demo:
    gr.Markdown("# TURNA")
    gr.Image("images/turna-logo.png", width=100)

    gr.Markdown(DESCRIPTION)

    

    
    with gr.Tab("Sentiment Analysis"):
        gr.Markdown("TURNA fine-tuned on sentiment analysis. Enter text to analyse sentiment and pick the model (tweets or product reviews).")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    sentiment_choice = gr.Radio(choices = ["turna_classification_17bintweet_sentiment", "turna_classification_tr_product_reviews"], label ="Model", value="turna_classification_17bintweet_sentiment")
                    sentiment_input = gr.Textbox(label="Sentiment Analysis Input")
                
                    sentiment_submit = gr.Button()
                sentiment_output = gr.Textbox(label="Sentiment Analysis Output")
                sentiment_submit.click(sentiment_analysis, inputs=[sentiment_input, sentiment_choice], outputs=sentiment_output)
            sentiment_examples = gr.Examples(examples = sentiment_example, inputs = [sentiment_input, sentiment_choice], outputs=sentiment_output, fn=sentiment_analysis)
    
    with gr.Tab("TURNA 🐦"):
        gr.Markdown("Pre-trained TURNA. Enter text to start generating.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    with gr.Accordion("Advanced Generation Parameters"):
                        max_new_tokens = gr.Slider(label = "Maximum length",
                        		minimum = 0,
                        		maximum = 512,
                        		value = 128)
                        length_penalty = gr.Slider(label = "Length penalty",
                            		value=1.0)
                        top_k = gr.Slider(label = "Top-k", value=10)
                        top_p = gr.Slider(label = "Top-p", value=0.95)
                        temp = gr.Slider(label = "Temperature", value=1.0, minimum=0.1, maximum=100.0)
                        no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
                        repetition_penalty = gr.Slider(label = "Repetition Penalty", minimum=0.0, value=3.1, step=0.1)
                        num_beams = gr.Slider(label = "Number of beams", minimum=1,
                            		maximum=10, value=3)
                        do_sample = gr.Radio(choices = [True, False], value = True, label = "Sampling")
                with gr.Column():
                    text_gen_input = gr.Textbox(label="Text Generation Input")
                
                    text_gen_submit = gr.Button()
                text_gen_output = gr.Textbox(label="Text Generation Output")
            text_gen_submit.click(turna, inputs=[text_gen_input, max_new_tokens, length_penalty,
                                    top_k, top_p, temp, num_beams,
                                    do_sample, no_repeat_ngram_size, repetition_penalty], outputs=text_gen_output)
            text_gen_example = [["Bir varmış, bir yokmuş, evvel zaman içinde, kalbur saman içinde, uzak diyarların birinde bir turna"]]
            text_gen_examples = gr.Examples(examples = text_gen_example, inputs = [text_gen_input, max_new_tokens, length_penalty,
                                    top_k, top_p, temp, num_beams, do_sample, no_repeat_ngram_size, repetition_penalty], outputs=text_gen_output, fn=turna)
        
    with gr.Tab("Text Categorization"):
        gr.Markdown("TURNA fine-tuned on text categorization. Enter text to categorize text or try the example.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    text_input = gr.Textbox(label="Text Categorization Input")
                
                    text_submit = gr.Button()
                text_output = gr.Textbox(label="Text Categorization Output")
                text_submit.click(categorize, inputs=[text_input], outputs=text_output)
            text_examples = gr.Examples(examples = text_category_example,inputs=[text_input], outputs=text_output, fn=categorize)
        
    
    with gr.Tab("NLI"):
        gr.Markdown("TURNA fine-tuned on natural language inference. Enter text to infer entailment and pick the model. You can also check for semantic similarity entailment.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    nli_choice = gr.Radio(choices = ["turna_nli_nli_tr", "turna_semantic_similarity_stsb_tr"], label ="Model", value="turna_nli_nli_tr")
                    nli_input = gr.Textbox(label="NLI Input")
                
                    nli_submit = gr.Button()
                nli_output = gr.Textbox(label="NLI Output")
                nli_submit.click(nli, inputs=[nli_input, nli_choice], outputs=nli_output)
            nli_examples = gr.Examples(examples = nli_example, inputs = [nli_input, nli_choice], outputs=nli_output, fn=nli)
    
    with gr.Tab("POS"):
        gr.Markdown("TURNA fine-tuned on part-of-speech-tagging. Enter text to parse parts of speech and pick the model.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    pos_choice = gr.Radio(choices = ["turna_pos_imst", "turna_pos_boun"], label ="Model", value="turna_pos_imst")
                    pos_input = gr.Textbox(label="POS Input")
                
                    pos_submit = gr.Button()
                pos_output = gr.Textbox(label="POS Output")
                pos_submit.click(pos, inputs=[pos_input, pos_choice], outputs=pos_output)
            pos_examples = gr.Examples(examples = ner_example, inputs = [pos_input, pos_choice], outputs=pos_output, fn=pos)
    
    with gr.Tab("NER"):
        gr.Markdown("TURNA fine-tuned on named entity recognition. Enter text to parse named entities and pick the model.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    ner_choice = gr.Radio(choices = ["turna_ner_wikiann", "turna_ner_milliyet"], label ="Model", value="turna_ner_wikiann")
                    ner_input = gr.Textbox(label="NER Input")
                    ner_submit = gr.Button()
                ner_output = gr.Textbox(label="NER Output")
                
                ner_submit.click(ner, inputs=[ner_input, ner_choice], outputs=ner_output)
            ner_examples = gr.Examples(examples = ner_example, inputs = [ner_input, ner_choice], outputs=ner_output, fn=ner)
    with gr.Tab("Paraphrase"):
        gr.Markdown("TURNA fine-tuned on paraphrasing. Enter text to paraphrase and pick the model.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    paraphrasing_choice = gr.Radio(choices = ["turna_paraphrasing_tatoeba", "turna_paraphrasing_opensubtitles"], label ="Model", value="turna_paraphrasing_tatoeba")
                    paraphrasing_input = gr.Textbox(label = "Paraphrasing Input")
                    paraphrasing_submit = gr.Button()
                paraphrasing_output = gr.Text(label="Paraphrasing Output")
                
            paraphrasing_submit.click(paraphrase, inputs=[paraphrasing_input, paraphrasing_choice], outputs=paraphrasing_output)
            paraphrase_examples = gr.Examples(examples = long_text, inputs = [paraphrasing_input, paraphrasing_choice], outputs=paraphrasing_output,  fn=paraphrase)
    with gr.Tab("Summarization"):
        gr.Markdown("TURNA fine-tuned on summarization. Enter text to summarize and pick the model.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    sum_choice = gr.Radio(choices = ["turna_summarization_mlsum", "turna_summarization_tr_news"], label ="Model", value="turna_summarization_mlsum")
                    sum_input = gr.Textbox(label = "Summarization Input")
                    sum_submit = gr.Button()
                sum_output = gr.Textbox(label = "Summarization Output")
                
                sum_submit.click(summarize, inputs=[sum_input, sum_choice], outputs=sum_output)
            sum_examples = gr.Examples(examples = long_text, inputs = [sum_input, sum_choice], outputs=sum_output,  fn=summarize)
demo.launch()