kashif-2b-instruct-rag-playground

Running on Zero

karimouda commited on 9 days ago

Commit

e92c1b5

verified ·

1 Parent(s): afe62b5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,16 +8,16 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """\
-# SILMA Kashif 2B Instruct V1.0 Playgroud
 This is a demo of [`silma-ai/SILMA-Kashif-2B-Instruct-v1.0`](https://huggingface.co/silma-ai/SILMA-Kashif-2B-Instruct-v1.0).
-** NOTE: this is a RAG model, it is only trained to answer questions based on context.
 """
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
-MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -37,7 +37,7 @@ def generate(
     message: str,
     chat_history: list[dict],
     max_new_tokens: int = 1024,
-    temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
@@ -115,9 +115,6 @@ demo = gr.ChatInterface(
     examples=[
         ["Hello there! How are you doing?"],
         ["Can you explain briefly to me what is the Python programming language?"],
-        ["Explain the plot of Cinderella in a sentence."],
-        ["How many hours does it take a man to eat a Helicopter?"],
-        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
     ],
     cache_examples=False,
     type="messages",

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """\
+# SILMA Kashif 2B Instruct v1.0 Playground
 This is a demo of [`silma-ai/SILMA-Kashif-2B-Instruct-v1.0`](https://huggingface.co/silma-ai/SILMA-Kashif-2B-Instruct-v1.0).
+** NOTE: Kashif is a RAG model, it is only trained to answer questions based on context.
 """
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "10096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
     message: str,
     chat_history: list[dict],
     max_new_tokens: int = 1024,
+    temperature: float = 0.01,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
     examples=[
         ["Hello there! How are you doing?"],
         ["Can you explain briefly to me what is the Python programming language?"],
     ],
     cache_examples=False,
     type="messages",