Spaces:
Running
on
Zero
Running
on
Zero
File size: 14,065 Bytes
8f59407 f9b8716 8f59407 ee7626a 8f59407 ba2a3ae 7d6f3d4 ba2a3ae 8f59407 0325d36 ecffc39 ba2a3ae 8f59407 123980d 38dd92a ba01e7c 09834f8 ddb7329 ca9e330 0325d36 ddb7329 ca9e330 c629a2b ddb7329 ca9e330 09834f8 c629a2b f9b8716 0325d36 f9b8716 ca9e330 ddb7329 0325d36 c629a2b f9b8716 0325d36 c629a2b f9b8716 c629a2b 09834f8 c629a2b 09834f8 c629a2b f9b8716 09834f8 c629a2b f9b8716 c629a2b f9b8716 0325d36 c629a2b f9b8716 0325d36 c629a2b f9b8716 0325d36 c629a2b f9b8716 0325d36 c629a2b 8f59407 123980d 8f59407 123980d 8f59407 a26ce70 b42a3ee f612201 a26ce70 0598d8e a26ce70 26bcd5f aab3062 a26ce70 123980d 7d6f3d4 ba2a3ae 8f59407 ff299b4 0325d36 ba2a3ae 0325d36 5c594a5 0325d36 a26ce70 272ce33 198feb9 74028a7 a26ce70 74028a7 a26ce70 74028a7 f9b32f8 f612201 ed6ef70 272ce33 f612201 a26ce70 123980d 272ce33 978d584 123980d 0325d36 0be38ca 0325d36 09834f8 ecffc39 0325d36 ecffc39 0325d36 ecffc39 0325d36 09834f8 bcd8f3a 09834f8 ecffc39 0325d36 ecffc39 c629a2b 09834f8 bcd8f3a 09834f8 ecffc39 0325d36 e3def2b 0325d36 c629a2b ecffc39 bcd8f3a f9b8716 09834f8 f9b8716 ecffc39 0325d36 ecffc39 c629a2b f9b8716 bcd8f3a 8f59407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
import gradio as gr
import spaces
from transformers import pipeline
import torch
DESCRIPTION="""
### a Turkish encoder-decoder language model
Welcome to our Huggingface space, where you can explore the capabilities of TURNA.
**Key Features of TURNA:**
- **Powerful Architecture:** TURNA contains 1.1B parameters, and was pre-trained with an encoder-decoder architecture following the UL2 framework on 43B tokens from various domains.
- **Diverse Training Data:** Our model is trained on a varied dataset of 43 billion tokens, covering a wide array of domains.
- **Broad Applications:** TURNA is fine-tuned for a variety of generation and understanding tasks, including:
- Summarization
- Paraphrasing
- News title generation
- Sentiment classification
- Text categorization
- Named entity recognition
- Part-of-speech tagging
- Semantic textual similarity
- Natural language inference
Refer to our [paper](https://arxiv.org/abs/2401.14373) for more details.
### Citation
```bibtex
@misc{uludoğan2024turna,
title={TURNA: A Turkish Encoder-Decoder Language Model for Enhanced Understanding and Generation},
author={Gökçe Uludoğan and Zeynep Yirmibeşoğlu Balal and Furkan Akkurt and Melikşah Türker and Onur Güngör and Susan Üsküdarlı},
year={2024},
eprint={2401.14373},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
```
**Note:** First inference might take time as the models are downloaded on-the-go.
*TURNA can generate toxic content or provide erroneous information. Double-check before usage.*
"""
sentiment_example = [["Bu üründen çok memnun kaldım."]]
long_text = [["Eyfel Kulesi (Fransızca: La tour Eiffel [la tuʀ ɛˈfɛl]), Paris'teki demir kule. Kule, aynı zamanda tüm dünyada Fransa'nın sembolü halini almıştır. İsmini, inşa ettiren Fransız inşaat mühendisi Gustave Eiffel'den alır.[1] En büyük turizm cazibelerinden biri olan Eyfel Kulesi, yılda 6 milyon turist çeker. 2002 yılında toplam ziyaretçi sayısı 200 milyona ulaşmıştır."]]
ner_example = [["Benim adım Turna."]]
t2t_example = [["Paraphrase: Bu üründen çok memnun kaldım."]]
nli_example = [["Bunu çok beğendim. Bunu çok sevdim."]]
text_category_example = [[" anadolu_efes e 18 lik star ! beko_basketbol_ligi nde iddialı bir kadroyla sezona giren anadolu_efes transfer harekatına devam ediyor"]]
@spaces.GPU
def nli(input, model_choice="turna_nli_nli_tr"):
if model_choice=="turna_nli_nli_tr":
nli_model = pipeline(model="boun-tabi-LMG/turna_nli_nli_tr", device=0)
return nli_model(input)[0]["generated_text"]
else:
stsb_model = pipeline(model="boun-tabi-LMG/turna_semantic_similarity_stsb_tr", device=0)
return stsb_model(input)[0]["generated_text"]
@spaces.GPU
def sentiment_analysis(input, model_choice="turna_classification_17bintweet_sentiment"):
if model_choice=="turna_classification_17bintweet_sentiment":
sentiment_model = pipeline(model="boun-tabi-LMG/turna_classification_17bintweet_sentiment", device=0)
return sentiment_model(input)[0]["generated_text"]
else:
product_reviews = pipeline(model="boun-tabi-LMG/turna_classification_tr_product_reviews", device=0)
return product_reviews(input)[0]["generated_text"]
@spaces.GPU
def pos(input, model_choice="turna_pos_imst"):
if model_choice=="turna_pos_imst":
pos_imst = pipeline(model="boun-tabi-LMG/turna_pos_imst", device=0)
return pos_imst(input)[0]["generated_text"]
else:
pos_boun = pipeline(model="boun-tabi-LMG/turna_pos_boun", device=0)
return pos_boun(input)[0]["generated_text"]
@spaces.GPU
def ner(input, model_choice="turna_ner_wikiann"):
if model_choice=="turna_ner_wikiann":
ner_wikiann = pipeline(model="boun-tabi-LMG/turna_ner_wikiann", device=0)
return ner_wikiann(input)[0]["generated_text"]
else:
ner_model = pipeline(model="boun-tabi-LMG/turna_ner_milliyet", device=0)
return ner_model(input)[0]["generated_text"]
@spaces.GPU
def paraphrase(input, model_choice="turna_paraphrasing_tatoeba"):
if model_choice=="turna_paraphrasing_tatoeba":
paraphrasing = pipeline(model="boun-tabi-LMG/turna_paraphrasing_tatoeba", device=0)
return paraphrasing(input)[0]["generated_text"]
else:
paraphrasing_sub = pipeline(model="boun-tabi-LMG/turna_paraphrasing_opensubtitles", device=0)
return paraphrasing_sub(input)[0]["generated_text"]
@spaces.GPU
def summarize(input, model_choice="turna_summarization_tr_news"):
if model_choice=="turna_summarization_tr_news":
news_sum = pipeline(model="boun-tabi-LMG/turna_summarization_tr_news", device=0)
return news_sum(input)[0]["generated_text"]
else:
summarization_model = pipeline(model="boun-tabi-LMG/turna_summarization_mlsum", device=0)
return summarization_model(input)[0]["generated_text"]
@spaces.GPU
def categorize(input):
ttc = pipeline(model="boun-tabi-LMG/turna_classification_ttc4900", device=0)
return ttc(input)[0]["generated_text"]
@spaces.GPU
def turna(input, max_new_tokens, length_penalty,
top_k, top_p, temp, num_beams,
do_sample, no_repeat_ngram_size, repetition_penalty):
turna = pipeline(model="boun-tabi-LMG/TURNA", device=0)
input = f"[S2S] {input}<EOS>"
return turna(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty,
top_k=top_k, top_p=top_p, temperature=temp, num_beams=num_beams,
do_sample = do_sample, no_repeat_ngram_size=no_repeat_ngram_size, repetition_penalty=repetition_penalty)[0]["generated_text"]
with gr.Blocks(theme="abidlabs/Lime") as demo:
gr.Markdown("# TURNA")
gr.Image("images/turna-logo.png", width=100)
gr.Markdown(DESCRIPTION)
with gr.Tab("Sentiment Analysis"):
gr.Markdown("TURNA fine-tuned on sentiment analysis. Enter text to analyse sentiment and pick the model (tweets or product reviews).")
with gr.Column():
with gr.Row():
with gr.Column():
sentiment_choice = gr.Radio(choices = ["turna_classification_17bintweet_sentiment", "turna_classification_tr_product_reviews"], label ="Model", value="turna_classification_17bintweet_sentiment")
sentiment_input = gr.Textbox(label="Sentiment Analysis Input")
sentiment_submit = gr.Button()
sentiment_output = gr.Textbox(label="Sentiment Analysis Output")
sentiment_submit.click(sentiment_analysis, inputs=[sentiment_input, sentiment_choice], outputs=sentiment_output)
sentiment_examples = gr.Examples(examples = sentiment_example, inputs = [sentiment_input, sentiment_choice], outputs=sentiment_output, fn=sentiment_analysis)
with gr.Tab("TURNA 🐦"):
gr.Markdown("Pre-trained TURNA. Enter text to start generating.")
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Accordion("Advanced Generation Parameters"):
max_new_tokens = gr.Slider(label = "Maximum length",
minimum = 0,
maximum = 512,
value = 128)
length_penalty = gr.Slider(label = "Length penalty",
value=1.0)
top_k = gr.Slider(label = "Top-k", value=10)
top_p = gr.Slider(label = "Top-p", value=0.95)
temp = gr.Slider(label = "Temperature", value=1.0, minimum=0.1, maximum=100.0)
no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
repetition_penalty = gr.Slider(label = "Repetition Penalty", minimum=0.0, value=3.1, step=0.1)
num_beams = gr.Slider(label = "Number of beams", minimum=1,
maximum=10, value=3)
do_sample = gr.Radio(choices = [True, False], value = True, label = "Sampling")
with gr.Column():
text_gen_input = gr.Textbox(label="Text Generation Input")
text_gen_submit = gr.Button()
text_gen_output = gr.Textbox(label="Text Generation Output")
text_gen_submit.click(turna, inputs=[text_gen_input, max_new_tokens, length_penalty,
top_k, top_p, temp, num_beams,
do_sample, no_repeat_ngram_size, repetition_penalty], outputs=text_gen_output)
text_gen_example = [["Bir varmış, bir yokmuş, evvel zaman içinde, kalbur saman içinde, uzak diyarların birinde bir turna"]]
text_gen_examples = gr.Examples(examples = text_gen_example, inputs = [text_gen_input, max_new_tokens, length_penalty,
top_k, top_p, temp, num_beams, do_sample, no_repeat_ngram_size, repetition_penalty], outputs=text_gen_output, fn=turna)
with gr.Tab("Text Categorization"):
gr.Markdown("TURNA fine-tuned on text categorization. Enter text to categorize text or try the example.")
with gr.Column():
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Text Categorization Input")
text_submit = gr.Button()
text_output = gr.Textbox(label="Text Categorization Output")
text_submit.click(categorize, inputs=[text_input], outputs=text_output)
text_examples = gr.Examples(examples = text_category_example,inputs=[text_input], outputs=text_output, fn=categorize)
with gr.Tab("NLI"):
gr.Markdown("TURNA fine-tuned on natural language inference. Enter text to infer entailment and pick the model. You can also check for semantic similarity entailment.")
with gr.Column():
with gr.Row():
with gr.Column():
nli_choice = gr.Radio(choices = ["turna_nli_nli_tr", "turna_semantic_similarity_stsb_tr"], label ="Model", value="turna_nli_nli_tr")
nli_input = gr.Textbox(label="NLI Input")
nli_submit = gr.Button()
nli_output = gr.Textbox(label="NLI Output")
nli_submit.click(nli, inputs=[nli_input, nli_choice], outputs=nli_output)
nli_examples = gr.Examples(examples = nli_example, inputs = [nli_input, nli_choice], outputs=nli_output, fn=nli)
with gr.Tab("POS"):
gr.Markdown("TURNA fine-tuned on part-of-speech-tagging. Enter text to parse parts of speech and pick the model.")
with gr.Column():
with gr.Row():
with gr.Column():
pos_choice = gr.Radio(choices = ["turna_pos_imst", "turna_pos_boun"], label ="Model", value="turna_pos_imst")
pos_input = gr.Textbox(label="POS Input")
pos_submit = gr.Button()
pos_output = gr.Textbox(label="POS Output")
pos_submit.click(pos, inputs=[pos_input, pos_choice], outputs=pos_output)
pos_examples = gr.Examples(examples = ner_example, inputs = [pos_input, pos_choice], outputs=pos_output, fn=pos)
with gr.Tab("NER"):
gr.Markdown("TURNA fine-tuned on named entity recognition. Enter text to parse named entities and pick the model.")
with gr.Column():
with gr.Row():
with gr.Column():
ner_choice = gr.Radio(choices = ["turna_ner_wikiann", "turna_ner_milliyet"], label ="Model", value="turna_ner_wikiann")
ner_input = gr.Textbox(label="NER Input")
ner_submit = gr.Button()
ner_output = gr.Textbox(label="NER Output")
ner_submit.click(ner, inputs=[ner_input, ner_choice], outputs=ner_output)
ner_examples = gr.Examples(examples = ner_example, inputs = [ner_input, ner_choice], outputs=ner_output, fn=ner)
with gr.Tab("Paraphrase"):
gr.Markdown("TURNA fine-tuned on paraphrasing. Enter text to paraphrase and pick the model.")
with gr.Column():
with gr.Row():
with gr.Column():
paraphrasing_choice = gr.Radio(choices = ["turna_paraphrasing_tatoeba", "turna_paraphrasing_opensubtitles"], label ="Model", value="turna_paraphrasing_tatoeba")
paraphrasing_input = gr.Textbox(label = "Paraphrasing Input")
paraphrasing_submit = gr.Button()
paraphrasing_output = gr.Text(label="Paraphrasing Output")
paraphrasing_submit.click(paraphrase, inputs=[paraphrasing_input, paraphrasing_choice], outputs=paraphrasing_output)
paraphrase_examples = gr.Examples(examples = long_text, inputs = [paraphrasing_input, paraphrasing_choice], outputs=paraphrasing_output, fn=paraphrase)
with gr.Tab("Summarization"):
gr.Markdown("TURNA fine-tuned on summarization. Enter text to summarize and pick the model.")
with gr.Column():
with gr.Row():
with gr.Column():
sum_choice = gr.Radio(choices = ["turna_summarization_mlsum", "turna_summarization_tr_news"], label ="Model", value="turna_summarization_mlsum")
sum_input = gr.Textbox(label = "Summarization Input")
sum_submit = gr.Button()
sum_output = gr.Textbox(label = "Summarization Output")
sum_submit.click(summarize, inputs=[sum_input, sum_choice], outputs=sum_output)
sum_examples = gr.Examples(examples = long_text, inputs = [sum_input, sum_choice], outputs=sum_output, fn=summarize)
demo.launch() |