Spaces:

boun-tabi-LMG
/

TURNA-GPU

Sleeping

File size: 20,169 Bytes

8f59407
 
f9b8716
8f59407
 
ee7626a
8f59407
ba2a3ae
 
 
 
 
 
 
 
 
7d6f3d4
3255b79
 
 
 
 
 
 
 
 
 
 
 
ba2a3ae
3255b79
 
 
ba2a3ae
 
 
 
9d25f83
ba2a3ae
 
 
 
 
 
 
 
8f59407
 
 
 
6c19101
8f59407
 
38c66c7
123980d
38dd92a
ba01e7c
09834f8
ddb7329
38c66c7
0325d36
ddb7329
453f168
ca9e330
c629a2b
ddb7329
38c66c7
ca9e330
09834f8
c629a2b
f9b8716
0325d36
f9b8716
ca9e330
6c19101
 
f9b8716
 
ca68a3c
09834f8
c629a2b
ca68a3c
09834f8
c629a2b
ca68a3c
f9b8716
 
1469e49
09834f8
c629a2b
1469e49
f9b8716
c629a2b
1469e49
f9b8716
 
 
496bd7a
f9b8716
0325d36
496bd7a
f9b8716
0325d36
 
496bd7a
f9b8716
 
c93dad0
6805c60
 
 
 
8f59407
cce4feb
 
 
 
 
 
 
 
123980d
 
 
8f59407
1469e49
8f59407
a26ce70
b42a3ee
 
95a7d48
 
 
0598d8e
a26ce70
26bcd5f
 
aab3062
a26ce70
 
123980d
5daac23
7d6f3d4
f08b866
ba2a3ae
8c5ac11
5daac23
ba2a3ae
0325d36
 
 
 
 
 
 
 
 
 
5c594a5
 
a26ce70
123980d
 
 
 
 
 
 
 
 
272ce33
978d584
123980d
0325d36
38c66c7
 
0325d36
 
 
 
38c66c7
 
 
0325d36
 
38c66c7
 
0325d36
09834f8
 
 
 
ecffc39
0325d36
ca68a3c
 
 
 
 
 
 
 
 
 
 
ecffc39
 
0325d36
ca68a3c
 
09834f8
 
 
 
 
ecffc39
0325d36
1469e49
 
 
 
 
 
 
 
 
 
 
ecffc39
 
c629a2b
 
1469e49
 
09834f8
 
 
 
ecffc39
0325d36
496bd7a
 
 
 
 
 
e3def2b
 
0325d36
c629a2b
496bd7a
 
f9b8716
09834f8
f9b8716
 
ecffc39
0325d36
6805c60
 
 
 
 
 
 
 
 
 
 
ecffc39
 
c629a2b
 
6805c60
 
3255b79
cce4feb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5daac23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95a7d48
5daac23
 
 
 
 
 
 
95a7d48
5daac23
 
95a7d48
5daac23
3255b79
 
8f59407

import gradio as gr
import spaces
from transformers import pipeline
import torch

DESCRIPTION="""

### a Turkish encoder-decoder language model 

Welcome to our Huggingface space, where you can explore the capabilities of TURNA. 
			
**Key Features of TURNA:**

- **Powerful Architecture:** TURNA contains 1.1B parameters, and was pre-trained with an encoder-decoder architecture following the UL2 framework on 43B tokens from various domains.
- **Diverse Training Data:** Our model is trained on a varied dataset of 43 billion tokens, covering a wide array of domains.
- **Broad Applications:** TURNA is fine-tuned for a variety of generation and understanding tasks, including:
    - Summarization
    - Paraphrasing
    - News title generation
    - Sentiment classification
    - Text categorization
    - Named entity recognition
    - Part-of-speech tagging
    - Semantic textual similarity
    - Natural language inference

**Note:** First inference might take time as the models are downloaded on-the-go.

*TURNA can generate toxic content or provide erroneous information. Double-check before usage.*

"""

CITATION = """
Refer to our [paper](https://arxiv.org/abs/2401.14373) for more details.

### Citation
```bibtex
@misc{uludogan2024turna,
	title={TURNA: A Turkish Encoder-Decoder Language Model for Enhanced Understanding and Generation}, 
	author={Gökçe Uludoğan and Zeynep Yirmibeşoğlu Balal and Furkan Akkurt and Melikşah Türker and Onur Güngör and Susan Üsküdarlı},
	year={2024},
	eprint={2401.14373},
	archivePrefix={arXiv},
	primaryClass={cs.CL}
}
```
"""


sentiment_example = [["Bu üründen çok memnun kaldım."]]
long_text = [["Eyfel Kulesi (Fransızca: La tour Eiffel [la tuʀ ɛˈfɛl]), Paris'teki demir kule. Kule, aynı zamanda tüm dünyada Fransa'nın sembolü halini almıştır. İsmini, inşa ettiren Fransız inşaat mühendisi Gustave Eiffel'den alır.[1] En büyük turizm cazibelerinden biri olan Eyfel Kulesi, yılda 6 milyon turist çeker. 2002 yılında toplam ziyaretçi sayısı 200 milyona ulaşmıştır."], ["Kalp krizi geçirenlerin yaklaşık üçte birinin kısa bir süre önce grip atlattığı düşünülüyor. Peki grip virüsü ne yapıyor da kalp krizine yol açıyor? Karpuz şöyle açıkladı: Grip virüsü kanın yapışkanlığını veya pıhtılaşmasını artırıyor."]]
ner_example = [["Benim adım Turna."]]
t2t_example = [["Paraphrase: Bu üründen çok memnun kaldım."]]
nli_example = [["Bunu çok beğendim.", "Bunu çok sevdim."]]
text_category_example = [[" anadolu_efes e 18 lik star ! beko_basketbol_ligi nde iddialı bir kadroyla sezona giren anadolu_efes transfer harekatına devam ediyor"]]



@spaces.GPU
def nli(first_input, second_input, model_choice="turna_nli_nli_tr"):
    
    if model_choice=="turna_nli_nli_tr":
        input = f"hipotez: {first_input} önerme: {second_input}"
        nli_model = pipeline(model="boun-tabi-LMG/turna_nli_nli_tr", device=0) 
        return nli_model(input)[0]["generated_text"]
    else:
        input = f"ilk cümle: {first_input} ikinci cümle: {second_input}"
        stsb_model = pipeline(model="boun-tabi-LMG/turna_semantic_similarity_stsb_tr", device=0)

        return stsb_model(input)[0]["generated_text"]


@spaces.GPU
def sentiment_analysis(input, model_choice="turna_classification_17bintweet_sentiment"):
    sentiment_model = pipeline(model=f"boun-tabi-LMG/{model_choice}", device=0) 
    return sentiment_model(input, max_new_tokens = 4)[0]["generated_text"]

@spaces.GPU
def pos(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size): 
    if model_choice=="turna_pos_imst":
        pos_imst = pipeline(model="boun-tabi-LMG/turna_pos_imst", device=0) 
        return pos_imst(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
    else:
        pos_boun = pipeline(model="boun-tabi-LMG/turna_pos_boun", device=0)
        return pos_boun(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]

@spaces.GPU
def ner(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
    if model_choice=="turna_ner_wikiann":
        ner_wikiann = pipeline(model="boun-tabi-LMG/turna_ner_wikiann", device=0) 
        return ner_wikiann(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
    else:
        ner_model = pipeline(model="boun-tabi-LMG/turna_ner_milliyet", device=0) 
        return ner_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]


@spaces.GPU
def paraphrase(input, model_choice, max_new_tokens):
    if model_choice=="turna_paraphrasing_tatoeba":
        paraphrasing = pipeline(model="boun-tabi-LMG/turna_paraphrasing_tatoeba", device=0) 
        return paraphrasing(input, max_new_tokens = max_new_tokens)[0]["generated_text"]
    else:
        paraphrasing_sub = pipeline(model="boun-tabi-LMG/turna_paraphrasing_opensubtitles", device=0) 

        return paraphrasing_sub(input, max_new_tokens = max_new_tokens)[0]["generated_text"]
        
@spaces.GPU    
def summarize(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
    model_mapping = {"turna_summarization_tr_news": "boun-tabi-LMG/turna_summarization_tr_news",
                     "turna_summarization_mlsum": "boun-tabi-LMG/turna_summarization_mlsum"}
    summarization_model = pipeline(model=model_mapping[model_choice], device=0) 
    return summarization_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]

@spaces.GPU    
def generate_title(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
    model_mapping = {"turna_title_generation_tr_news": "boun-tabi-LMG/turna_title_generation_tr_news",
                     "turna_title_generation_mlsum": "boun-tabi-LMG/turna_title_generation_mlsum"}
    summarization_model = pipeline(model=model_mapping[model_choice], device=0) 
    return summarization_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]


@spaces.GPU
def categorize(input):
    ttc = pipeline(model="boun-tabi-LMG/turna_classification_ttc4900", device=0) 

    return ttc(input, max_new_tokens = 8)[0]["generated_text"]

@spaces.GPU
def turna(input, max_new_tokens, length_penalty,
                                    top_k, top_p, temp, num_beams,
                                    do_sample, no_repeat_ngram_size, repetition_penalty, turna_model_version):
                                        
    turna = pipeline(model=f"boun-tabi-LMG/{turna_model_version}", device=0) 
    input = f"[S2S] {input}<EOS>"

    return turna(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty,
                                    top_k=top_k, top_p=top_p, temperature=temp, num_beams=num_beams,
                                    do_sample = do_sample, no_repeat_ngram_size=no_repeat_ngram_size, repetition_penalty=repetition_penalty)[0]["generated_text"]


with gr.Blocks(theme="abidlabs/Lime") as demo:

    gr.Markdown("# TURNA")
    gr.Image("images/turna-logo.png", width=100, show_label=False, show_download_button=False, show_share_button=False)

    with gr.Tab("TURNA"):
        gr.Markdown(DESCRIPTION)
    
    with gr.Tab("Sentiment Analysis"):
        gr.Markdown("TURNA fine-tuned on sentiment analysis. Enter text to analyse sentiment and pick the model (tweets or product reviews).")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    sentiment_choice = gr.Radio(choices = ["turna_classification_17bintweet_sentiment", "turna_classification_tr_product_reviews"], label ="Model", value="turna_classification_17bintweet_sentiment")
                    sentiment_input = gr.Textbox(label="Sentiment Analysis Input")
                
                    sentiment_submit = gr.Button()
                sentiment_output = gr.Textbox(label="Sentiment Analysis Output")
                sentiment_submit.click(sentiment_analysis, inputs=[sentiment_input, sentiment_choice], outputs=sentiment_output)
            sentiment_examples = gr.Examples(examples = sentiment_example, inputs = [sentiment_input, sentiment_choice], outputs=sentiment_output, fn=sentiment_analysis)
        
    with gr.Tab("Text Categorization"):
        gr.Markdown("TURNA fine-tuned on text categorization. Enter text to categorize text or try the example.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    text_input = gr.Textbox(label="Text Categorization Input")
                
                    text_submit = gr.Button()
                text_output = gr.Textbox(label="Text Categorization Output")
                text_submit.click(categorize, inputs=[text_input], outputs=text_output)
            text_examples = gr.Examples(examples = text_category_example,inputs=[text_input], outputs=text_output, fn=categorize)
        
    
    with gr.Tab("NLI & STS"):
        gr.Markdown("TURNA fine-tuned on natural language inference or semantic textual similarity. Enter text to infer entailment or measure semantic similarity. ")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    nli_choice = gr.Radio(choices = ["turna_nli_nli_tr", "turna_semantic_similarity_stsb_tr"], label ="Model", value="turna_nli_nli_tr")
                    nli_first_input = gr.Textbox(label="First Sentence")
                    nli_second_input = gr.Textbox(label="Second Sentence")

                    nli_submit = gr.Button()
                nli_output = gr.Textbox(label="NLI Output")
                nli_submit.click(nli, inputs=[nli_first_input, nli_second_input, nli_choice], outputs=nli_output)
            nli_examples = gr.Examples(examples = nli_example, inputs = [nli_first_input, nli_second_input, nli_choice], outputs=nli_output, fn=nli)
    
    with gr.Tab("POS"):
        gr.Markdown("TURNA fine-tuned on part-of-speech-tagging. Enter text to parse parts of speech and pick the model.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    pos_choice = gr.Radio(choices = ["turna_pos_imst", "turna_pos_boun"], label ="Model", value="turna_pos_imst")
                    with gr.Accordion("Advanced Generation Parameters"):
                        max_new_tokens = gr.Slider(label = "Maximum length",
                        		minimum = 0,
                        		maximum = 64,
                        		value = 64)
                        length_penalty = gr.Slider(label = "Length penalty",
                                    minimum = -10,
                        		    maximum = 10,
                            		value=2.0)
                        no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
                with gr.Column():    
                    pos_input = gr.Textbox(label="POS Input")
                    pos_submit = gr.Button()
                pos_output = gr.Textbox(label="POS Output")
                pos_submit.click(pos, inputs=[pos_input, pos_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=pos_output)
            pos_examples = gr.Examples(examples = ner_example, inputs = [pos_input, pos_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=pos_output, fn=pos)
    
    with gr.Tab("NER"):
        gr.Markdown("TURNA fine-tuned on named entity recognition. Enter text to parse named entities and pick the model.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    ner_choice = gr.Radio(choices = ["turna_ner_wikiann", "turna_ner_milliyet"], label ="Model", value="turna_ner_wikiann")
                    with gr.Accordion("Advanced Generation Parameters"):
                        max_new_tokens = gr.Slider(label = "Maximum length",
                        		minimum = 0,
                        		maximum = 64,
                        		value = 64)
                        length_penalty = gr.Slider(label = "Length penalty",
                                    minimum = -10,
                        		    maximum = 10,
                            		value=2.0)
                        no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
                with gr.Column():    
                    ner_input = gr.Textbox(label="NER Input")
                    ner_submit = gr.Button()
                ner_output = gr.Textbox(label="NER Output")
                
                ner_submit.click(ner, inputs=[ner_input, ner_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=ner_output)
            ner_examples = gr.Examples(examples = ner_example, inputs = [ner_input, ner_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=ner_output, fn=ner)
    with gr.Tab("Paraphrase"):
        gr.Markdown("TURNA fine-tuned on paraphrasing. Enter text to paraphrase and pick the model.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    paraphrasing_choice = gr.Radio(choices = ["turna_paraphrasing_tatoeba", "turna_paraphrasing_opensubtitles"], label ="Model", value="turna_paraphrasing_tatoeba")
                    with gr.Accordion("Advanced Generation Parameters"):
                        max_new_tokens = gr.Slider(label = "Maximum length",
                        		minimum = 0,
                        		maximum = 20,
                        		value = 20)
                with gr.Column():
                    paraphrasing_input = gr.Textbox(label = "Paraphrasing Input")
                    paraphrasing_submit = gr.Button()
                paraphrasing_output = gr.Text(label="Paraphrasing Output")
                
            paraphrasing_submit.click(paraphrase, inputs=[paraphrasing_input, paraphrasing_choice, max_new_tokens], outputs=paraphrasing_output)
            paraphrase_examples = gr.Examples(examples = long_text, inputs = [paraphrasing_input, paraphrasing_choice, max_new_tokens], outputs=paraphrasing_output,  fn=paraphrase)
    with gr.Tab("Summarization"):
        gr.Markdown("TURNA fine-tuned on summarization. Enter text to summarize and pick the model.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    sum_choice = gr.Radio(choices = ["turna_summarization_mlsum", "turna_summarization_tr_news"], label ="Model", value="turna_summarization_mlsum")
                    with gr.Accordion("Advanced Generation Parameters"):
                        max_new_tokens = gr.Slider(label = "Maximum length",
                        		minimum = 0,
                        		maximum = 512,
                        		value = 128)
                        length_penalty = gr.Slider(label = "Length penalty",
                                    minimum = -10,
                        		    maximum = 10,
                            		value=2.0)
                        no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
                with gr.Column():
                    sum_input = gr.Textbox(label = "Summarization Input")
                    sum_submit = gr.Button()
                sum_output = gr.Textbox(label = "Summarization Output")
                
                sum_submit.click(summarize, inputs=[sum_input, sum_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=sum_output)
            sum_examples = gr.Examples(examples = long_text, inputs = [sum_input, sum_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=sum_output,  fn=summarize)
    
    with gr.Tab("Title Generation"):
        gr.Markdown("TURNA fine-tuned on news title generation. Enter news text to generate a title.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    title_choice = gr.Radio(choices = ["turna_title_generation_tr_news", "turna_title_generation_mlsum"], label ="Model", value="turna_title_generation_tr_news")
                    with gr.Accordion("Advanced Generation Parameters"):
                        max_new_tokens = gr.Slider(label = "Maximum length",
                        		minimum = 0,
                        		maximum = 64,
                        		value = 64)
                        length_penalty = gr.Slider(label = "Length penalty",
                                    minimum = -10,
                        		    maximum = 10,
                            		value=2.0)
                        no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
                with gr.Column():
                    title_input = gr.Textbox(label = "News Title Generation Input")
                    title_submit = gr.Button()
                title_output = gr.Textbox(label = "News Title Generation Output")
                
                title_submit.click(generate_title, inputs=[title_input, title_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=title_output)
            title_examples = gr.Examples(examples = long_text, inputs = [title_input, title_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=title_output,  fn=generate_title)

    with gr.Tab("Text Generation"):
        gr.Markdown("Pre-trained TURNA. Enter text to start generating.")
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    with gr.Accordion("Advanced Generation Parameters"):
                        max_new_tokens = gr.Slider(label = "Maximum length",
                        		minimum = 0,
                        		maximum = 512,
                        		value = 128)
                        length_penalty = gr.Slider(label = "Length penalty",
                            		value=1.0)
                        top_k = gr.Slider(label = "Top-k", value=10)
                        top_p = gr.Slider(label = "Top-p", value=0.95)
                        temp = gr.Slider(label = "Temperature", value=1.0, minimum=0.1, maximum=100.0)
                        no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
                        repetition_penalty = gr.Slider(label = "Repetition Penalty", minimum=0.0, value=3.1, step=0.1)
                        num_beams = gr.Slider(label = "Number of beams", minimum=1,
                            		maximum=10, value=3)
                        do_sample = gr.Radio(choices = [True, False], value = True, label = "Sampling")
                        turna_model_version = gr.Radio(choices = ["TURNA", "TURNA-2850K", "TURNA-4350K"], value = "TURNA", label = "Choose TURNA model version")
                with gr.Column():
                    text_gen_input = gr.Textbox(label="Text Generation Input")
                
                    text_gen_submit = gr.Button()
                text_gen_output = gr.Textbox(label="Text Generation Output")
            text_gen_submit.click(turna, inputs=[text_gen_input, max_new_tokens, length_penalty,
                                    top_k, top_p, temp, num_beams,
                                    do_sample, no_repeat_ngram_size, repetition_penalty, turna_model_version], outputs=text_gen_output)
            text_gen_example = [["Bir varmış, bir yokmuş, evvel zaman içinde, kalbur saman içinde, uzak diyarların birinde bir turna"]]
            text_gen_examples = gr.Examples(examples = text_gen_example, inputs = [text_gen_input, max_new_tokens, length_penalty,
                                    top_k, top_p, temp, num_beams, do_sample, no_repeat_ngram_size, repetition_penalty, turna_model_version], outputs=text_gen_output, fn=turna)

    gr.Markdown(CITATION)

demo.launch()