Spaces:

anakin87
/

Phi-3.5-mini-ITA

Running on Zero

App Files Files Community

anakin87 commited on Aug 28, 2024

Commit

4d3de5e

•

1 Parent(s): 698d75d

updates

Browse files

Files changed (1) hide show

app.py +22 -12

app.py CHANGED Viewed

@@ -6,6 +6,14 @@ import gradio as gr
 import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """\
 # Gemma 2 9B IT
@@ -24,11 +32,12 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model_id = "anakin87/Phi-3.5-mini-ITA"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
     torch_dtype=torch.bfloat16,
 )
 model.config.sliding_window = 4096
 model.eval()
@@ -39,10 +48,10 @@ def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     max_new_tokens: int = 1024,
-    temperature: float = 0.6,
-    top_p: float = 0.9,
     top_k: int = 50,
-    repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
     for user, assistant in chat_history:
@@ -93,17 +102,17 @@ chat_interface = gr.ChatInterface(
         ),
         gr.Slider(
             label="Temperature",
-            minimum=0.1,
             maximum=4.0,
             step=0.1,
-            value=0.6,
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
             minimum=0.05,
             maximum=1.0,
             step=0.05,
-            value=0.9,
         ),
         gr.Slider(
             label="Top-k",
@@ -117,19 +126,20 @@ chat_interface = gr.ChatInterface(
             minimum=1.0,
             maximum=2.0,
             step=0.05,
-            value=1.2,
         ),
     ],
     stop_btn=None,
     examples=[
         ["Ciao! Come stai?"],
-        ["Puoi spiegarmi brevemente cos'è il linguaggio di programmazione Python?"],
-        ["Spiega la trama di Cenerentola in una frase."],
-        ["Quante ore ci vogliono a un uomo per mangiare un elicottero?"],
         ["Scrivi un articolo di 100 parole sui 'Benefici dell'open-source nella ricerca sull'intelligenza artificiale'"],
         ["Hello there! How are you doing?"],
         ["Can you explain briefly to me what is the Python programming language?"],
-        ["Explain the plot of Cinderella in a sentence."],
         ["How many hours does it take a man to eat a Helicopter?"],
         ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
     ],

 import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+import subprocess
+subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True,
+)
 DESCRIPTION = """\
 # Gemma 2 9B IT
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model_id = "anakin87/Phi-3.5-mini-ITA"
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
     torch_dtype=torch.bfloat16,
+    trust_remote_code=True,
 )
 model.config.sliding_window = 4096
 model.eval()
     message: str,
     chat_history: list[tuple[str, str]],
     max_new_tokens: int = 1024,
+    temperature: float = 0.001,
+    top_p: float = 1.0,
     top_k: int = 50,
+    repetition_penalty: float = 1.0,
 ) -> Iterator[str]:
     conversation = []
     for user, assistant in chat_history:
         ),
         gr.Slider(
             label="Temperature",
+            minimum=0,
             maximum=4.0,
             step=0.1,
+            value=0.001,
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
             minimum=0.05,
             maximum=1.0,
             step=0.05,
+            value=1.0,
         ),
         gr.Slider(
             label="Top-k",
             minimum=1.0,
             maximum=2.0,
             step=0.05,
+            value=1.0,
         ),
     ],
     stop_btn=None,
     examples=[
         ["Ciao! Come stai?"],
+        ["Pro e contro di una relazione a lungo termine. Elenco puntato con max 3 pro e 3 contro sintetici."],
+        ["Quante ore impiega un uomo per mangiare un elicottero?"],
+        ["Come si apre un file JSON in Python?"],
+        ["Fammi un elenco puntato dei pro e contro di vivere in Italia. Massimo 2 pro e 2 contro."],
+        ["Inventa una breve storia con animali sul valore dell'amicizia."],
         ["Scrivi un articolo di 100 parole sui 'Benefici dell'open-source nella ricerca sull'intelligenza artificiale'"],
         ["Hello there! How are you doing?"],
         ["Can you explain briefly to me what is the Python programming language?"],
         ["How many hours does it take a man to eat a Helicopter?"],
         ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
     ],