Spaces:

xianbao
/

SambaNova-fast

Runtime error

App Files Files Community

Xianbao QIAN commited on Aug 3, 2024

Commit

93e4eee

1 Parent(s): d76943e

update new ui

Browse files

Files changed (1) hide show

app.py +88 -30

app.py CHANGED Viewed

@@ -7,41 +7,102 @@ import sambanova
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
-    max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
-    conversation = []
-    for user, assistant in chat_history:
-        conversation.extend(
-            [
-                {"role": "user", "content": user},
-                {"role": "assistant", "content": assistant},
-            ]
-        )
-    conversation.append({"role": "user", "content": message})
     outputs = []
-    for text in sambanova.Streamer(conversation, new_tokens=max_new_tokens,
-                                   temperature=temperature, top_k=top_k, top_p=top_p):
         outputs.append(text)
         yield "".join(outputs)
-MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 chat_interface = gr.ChatInterface(
-    fn=generate,
     additional_inputs=[
         gr.Slider(
-            label="Max new tokens",
             minimum=1,
-            maximum=MAX_MAX_NEW_TOKENS,
             step=1,
-            value=DEFAULT_MAX_NEW_TOKENS,
         ),
         gr.Slider(
             label="Temperature",
@@ -64,29 +125,26 @@ chat_interface = gr.ChatInterface(
             step=1,
             value=50,
         ),
-        gr.Slider(
-            label="Repetition penalty",
-            minimum=1.0,
-            maximum=2.0,
-            step=0.05,
-            value=1.2,
-        ),
     ],
-    stop_btn=None,
-    fill_height=True,
     examples=[
         ["Which one is bigger? 4.9 or 4.11"],
-        ["Can you explain briefly to me what is the Python programming language?"],
         ["Explain the plot of Cinderella in a sentence."],
         ["How many hours does it take a man to eat a Helicopter?"],
-        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
     ],
     cache_examples=False,
 )
 with gr.Blocks() as demo:
     gr.Markdown('# Sambanova model inference LLAMA 405B')
     chat_interface.render()
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()

 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
+    system_message,
+    max_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    conversation = [{"role": "system", "content": system_message}]
+    for val in chat_history:
+        if val[0]:
+            conversation.append({"role": "user", "content": val[0]})
+        if val[1]:
+            conversation.append({"role": "assistant", "content": val[1]})
     outputs = []
+    for text in sambanova.Streamer(conversation,
+                                   new_tokens=max_tokens,
+                                   temperature=temperature,
+                                   top_k=top_k,
+                                   top_p=top_p):
         outputs.append(text)
         yield "".join(outputs)
+MAX_MAX_TOKENS = 2048
+DEFAULT_MAX_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+# chat_interface = gr.ChatInterface(
+#     fn=generate,
+#     additional_inputs=[
+#         gr.Slider(
+#             label="Max new tokens",
+#             minimum=1,
+#             maximum=MAX_MAX_NEW_TOKENS,
+#             step=1,
+#             value=DEFAULT_MAX_NEW_TOKENS,
+#         ),
+#         gr.Slider(
+#             label="Temperature",
+#             minimum=0.1,
+#             maximum=4.0,
+#             step=0.1,
+#             value=0.6,
+#         ),
+#         gr.Slider(
+#             label="Top-p (nucleus sampling)",
+#             minimum=0.05,
+#             maximum=1.0,
+#             step=0.05,
+#             value=0.9,
+#         ),
+#         gr.Slider(
+#             label="Top-k",
+#             minimum=1,
+#             maximum=1000,
+#             step=1,
+#             value=50,
+#         ),
+#         gr.Slider(
+#             label="Repetition penalty",
+#             minimum=1.0,
+#             maximum=2.0,
+#             step=0.05,
+#             value=1.2,
+#         ),
+#     ],
+#     stop_btn=None,
+#     fill_height=True,
+#     examples=[
+#         ["Which one is bigger? 4.9 or 4.11"],
+#         [
+#             "Can you explain briefly to me what is the Python programming language?"
+#         ],
+#         ["Explain the plot of Cinderella in a sentence."],
+#         ["How many hours does it take a man to eat a Helicopter?"],
+#         [
+#             "Write a 100-word article on 'Benefits of Open-Source in AI research'"
+#         ],
+#     ],
+#     cache_examples=False,
+# )
 chat_interface = gr.ChatInterface(
+    generate,
     additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.",
+                   label="System message"),
         gr.Slider(
+            label="Max tokens",
             minimum=1,
+            maximum=MAX_MAX_TOKENS,
             step=1,
+            value=DEFAULT_MAX_TOKENS,
         ),
         gr.Slider(
             label="Temperature",
             step=1,
             value=50,
         ),
     ],
     examples=[
         ["Which one is bigger? 4.9 or 4.11"],
+        [
+            "Can you explain briefly to me what is the Python programming language?"
+        ],
         ["Explain the plot of Cinderella in a sentence."],
         ["How many hours does it take a man to eat a Helicopter?"],
+        [
+            "Write a 100-word article on 'Benefits of Open-Source in AI research'"
+        ],
     ],
     cache_examples=False,
 )
 with gr.Blocks() as demo:
     gr.Markdown('# Sambanova model inference LLAMA 405B')
     chat_interface.render()
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()