Spaces:

boun-tabi-LMG
/

TURNA

Runtime error

App Files Files Community

yirmibesogluz commited on Jan 26, 2024

Commit

ca81517

1 Parent(s): cfa4f27

Create home page

Browse files

Files changed (3) hide show

app.py +5 -1
apps/home.py +10 -79
apps/turna.py +106 -0

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ import apps.sentiment
 import apps.categorization
 import apps.ner
 import apps.pos_tagging
 st.set_page_config(
         page_title="Turna",
@@ -25,7 +27,9 @@ PAGES = {
     "Sentiment Classification": apps.sentiment,
     "Text Categorization": apps.categorization,
     "Named Entity Recognition": apps.ner,
-    "Part-of-Speech Tagging": apps.pos_tagging
 }
 st.sidebar.title("Navigation")

 import apps.categorization
 import apps.ner
 import apps.pos_tagging
+import apps.sts
+import apps.turna
 st.set_page_config(
         page_title="Turna",
     "Sentiment Classification": apps.sentiment,
     "Text Categorization": apps.categorization,
     "Named Entity Recognition": apps.ner,
+    "Part-of-Speech Tagging": apps.pos_tagging,
+    "Semantic Textual Similarity": apps.sts,
+    "Text Generation": apps.turna,
 }
 st.sidebar.title("Navigation")

apps/home.py CHANGED Viewed

@@ -21,86 +21,17 @@ def write():
 	st.markdown(
 			"""
-			<h3 style="text-align:right;">TURNA is a Turkish encoder-decoder language model.</h3>
-			<p style="text-align:right;"><p>
-			<p style="text-align:right;">Use the generation parameters on the sidebar to adjust generation quality.</p>
 			<p style="text-align:right;"><p>
 			""",
 			unsafe_allow_html=True,
 		)
-	#st.title('Turkish Language Generation')
-	#st.write('...with Turna')
-	# Sidebar
-	# Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
-	st.sidebar.subheader("Configurable parameters")
-	max_new_tokens = st.sidebar.number_input(
-		"Maximum length",
-		min_value=0,
-		max_value=512,
-		value=128,
-		help="The maximum length of the sequence to be generated.",
-	)
-	length_penalty = st.sidebar.number_input(
-		"Length penalty",
-		value=1.0,
-		help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
-	)
-	do_sample = st.sidebar.selectbox(
-		"Sampling?",
-		(True, False),
-		help="Whether or not to use sampling; use greedy decoding otherwise.",
-	)
-	num_beams = st.sidebar.number_input(
-		"Number of beams",
-		min_value=1,
-		max_value=10,
-		value=3,
-		help="The number of beams to use for beam search.",
-	)
-	repetition_penalty = st.sidebar.number_input(
-		"Repetition Penalty",
-		min_value=0.0,
-		value=3.0,
-		step=0.1,
-		help="The parameter for repetition penalty. 1.0 means no penalty",
-	)
-	no_repeat_ngram_size = st.sidebar.number_input(
-		"No Repeat N-Gram Size",
-		min_value=0,
-		value=3,
-		help="If set to int > 0, all ngrams of that size can only occur once.",
-	)
-	temp = st.sidebar.slider(
-		"Temperature",
-		value=1.0,
-		min_value=0.1,
-		max_value=100.0,
-		help="The value used to module the next token probabilities.",
-	)
-	top_k = st.sidebar.number_input(
-		"Top k",
-		value=10,
-		help="The number of highest probability vocabulary tokens to keep for top-k-filtering.",
-	)
-	top_p = st.sidebar.number_input(
-		"Top p",
-		value=0.95,
-		help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.",
-	)
-	input_text = st.text_area(label='Enter a text: ', height=100,
-		value="Türkiye'nin başkenti neresidir?")
-	url = "https://api-inference.huggingface.co/models/boun-tabi-LMG/TURNA"
-	params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens,
-			"do_sample":do_sample, "num_beams":num_beams, "repetition_penalty":repetition_penalty,
-			 "top_p":top_p, "top_k":top_k, "temperature":temp, "early_stopping": True, "max_length": 256}
-	if st.button("Generate"):
-		with st.spinner('Generating...'):
-			output = query(f'[S2S] {input_text}<EOS>', url, params)
-			st.success(output)

 	st.markdown(
 			"""
+			<h3 style="text-align:left;">... is a Turkish encoder-decoder language model.</h3>
 			<p style="text-align:right;"><p>
+			<p style="text-align:left;">In this Huggingface space, you can test the TURNA language model. </p>
+			<p style="text-align:left;">The model contains 1.1B parameters, and was pre-trained with an encoder-decoder architecture following the UL2 framework on 43B tokens from various domains.  </p>
+            <p style="text-align:left;">TURNA was fine-tuned to carry out Turkish summarization, paraphrasing, news title generation, sentiment classification, text categorization, named entity recognition, part-of-speech tagging, semantic textual similarity and natural language inference tasks.  </p>
+            <p style="text-align:left;">Go to the <strong>Navigation</strong> bar to access our applications. </p>
+            <p style="text-align:left;">Refer to our <a href="https://arxiv.org/abs/2401.14373">paper</a>  for more details...  </p>
+            <p style="text-align:left;"><p>
+            <p style="text-align:right;"><em>TURNA can generate toxic content or provide erroneous information. Double-check before usage. </em><p>
 			""",
 			unsafe_allow_html=True,
 		)

apps/turna.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import requests
+import streamlit as st
+import time
+from transformers import pipeline
+import os
+from .utils import query
+def write():
+	st.markdown(
+			"""
+			<h1 style="text-align:left;">TURNA</h1>
+			""",
+			unsafe_allow_html=True,
+		)
+	st.write("#")
+	col = st.columns(2)
+	col[0].image("images/turna-logo.png", width=100)
+	st.markdown(
+			"""
+			<h3 style="text-align:right;">TURNA is a Turkish encoder-decoder language model.</h3>
+			<p style="text-align:right;"><p>
+			<p style="text-align:right;">Use the generation parameters on the sidebar to adjust generation quality.</p>
+			<p style="text-align:right;"><p>
+			""",
+			unsafe_allow_html=True,
+		)
+	#st.title('Turkish Language Generation')
+	#st.write('...with Turna')
+	# Sidebar
+	# Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
+	st.sidebar.subheader("Configurable parameters")
+	max_new_tokens = st.sidebar.number_input(
+		"Maximum length",
+		min_value=0,
+		max_value=512,
+		value=128,
+		help="The maximum length of the sequence to be generated.",
+	)
+	length_penalty = st.sidebar.number_input(
+		"Length penalty",
+		value=1.0,
+		help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
+	)
+	do_sample = st.sidebar.selectbox(
+		"Sampling?",
+		(True, False),
+		help="Whether or not to use sampling; use greedy decoding otherwise.",
+	)
+	num_beams = st.sidebar.number_input(
+		"Number of beams",
+		min_value=1,
+		max_value=10,
+		value=3,
+		help="The number of beams to use for beam search.",
+	)
+	repetition_penalty = st.sidebar.number_input(
+		"Repetition Penalty",
+		min_value=0.0,
+		value=3.0,
+		step=0.1,
+		help="The parameter for repetition penalty. 1.0 means no penalty",
+	)
+	no_repeat_ngram_size = st.sidebar.number_input(
+		"No Repeat N-Gram Size",
+		min_value=0,
+		value=3,
+		help="If set to int > 0, all ngrams of that size can only occur once.",
+	)
+	temp = st.sidebar.slider(
+		"Temperature",
+		value=1.0,
+		min_value=0.1,
+		max_value=100.0,
+		help="The value used to module the next token probabilities.",
+	)
+	top_k = st.sidebar.number_input(
+		"Top k",
+		value=10,
+		help="The number of highest probability vocabulary tokens to keep for top-k-filtering.",
+	)
+	top_p = st.sidebar.number_input(
+		"Top p",
+		value=0.95,
+		help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.",
+	)
+	input_text = st.text_area(label='Enter a text: ', height=100,
+		value="Türkiye'nin başkenti neresidir?")
+	url = "https://api-inference.huggingface.co/models/boun-tabi-LMG/TURNA"
+	params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens,
+			"do_sample":do_sample, "num_beams":num_beams, "repetition_penalty":repetition_penalty,
+			 "top_p":top_p, "top_k":top_k, "temperature":temp, "early_stopping": True, "max_length": 256}
+	if st.button("Generate"):
+		with st.spinner('Generating...'):
+			output = query(f'[S2S] {input_text}<EOS>', url, params)
+			st.success(output)