Spaces:

DevQuasar
/

llama3_on_sbc

Runtime error

App Files Files Community

csabakecskemeti commited on May 31

Commit

b293ad4

•

1 Parent(s): 54929aa

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -24

app.py CHANGED Viewed

@@ -5,10 +5,60 @@ import json
 sbc_host_url = os.environ['URL']
-def get_completion(prompt:str, messages:str = '', n_predict=128):
-    system = "### System: You are a helpful assistant helps to brainstorm ideas.\n"
-    prompt_templated = f'{system} {messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'
     headers = {
         "Content-Type": "application/json"
     }
@@ -16,36 +66,28 @@ def get_completion(prompt:str, messages:str = '', n_predict=128):
         "prompt": prompt_templated,
         "n_predict": n_predict,
         "stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"],
-        "stream": "True"
     }
     try:
-        response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data))
         if response.status_code == 200:
-            return response.json()['content']
         else:
             response.raise_for_status()
-    except:
         raise gr.Warning("Apologies for the inconvenience! Our model is currently self-hosted and unavailable at the moment.")
-def chatty(prompt, messages):
-    # print(prompt)
-    # print(f'messages: {messages}')
-    past_messages = ''
-    if len(messages) > 0:
-        for idx, message in enumerate(messages):
-            print(f'idx: {idx}, message: {message}')
-            past_messages += f'\n### HUMAN: {message[0]}'
-            past_messages += f'\n### ASSISTANT: {message[1]}'
-        # past_messages = messages[0][0]
-    # print(f'past_messages: {past_messages}')
-    messages = get_completion(prompt, past_messages)
-    return messages.split('### ASSISTANT:')[-1]
 with gr.Blocks() as demo:
     gr.Image("sbc.jpg")
     gr.ChatInterface(

 sbc_host_url = os.environ['URL']
+# def get_completion(prompt:str, messages:str = '', n_predict=128):
+#     system = "### System: You are a helpful assistant helps to brainstorm ideas.\n"
+#     prompt_templated = f'{system} {messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'
+#     headers = {
+#         "Content-Type": "application/json"
+#     }
+#     data = {
+#         "prompt": prompt_templated,
+#         "n_predict": n_predict,
+#         "stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"],
+#         "stream": "True"
+#     }
+#     try:
+#         response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data))
+#         if response.status_code == 200:
+#             return response.json()['content']
+#         else:
+#             response.raise_for_status()
+#     except:
+#         raise gr.Warning("Apologies for the inconvenience! Our model is currently self-hosted and unavailable at the moment.")
+# def chatty(prompt, messages):
+#     # print(prompt)
+#     # print(f'messages: {messages}')
+#     past_messages = ''
+#     if len(messages) > 0:
+#         for idx, message in enumerate(messages):
+#             print(f'idx: {idx}, message: {message}')
+#             past_messages += f'\n### HUMAN: {message[0]}'
+#             past_messages += f'\n### ASSISTANT: {message[1]}'
+#         # past_messages = messages[0][0]
+#     # print(f'past_messages: {past_messages}')
+#     messages = get_completion(prompt, past_messages)
+#     return messages.split('### ASSISTANT:')[-1]
+# stream
+def chatty(prompt, messages, n_predict=128):
+    # print(prompt)
+    # print(f'messages: {messages}')
+    past_messages = ''
+    if len(messages) > 0:
+        for idx, message in enumerate(messages):
+            print(f'idx: {idx}, message: {message}')
+            past_messages += f'\n### HUMAN: {message[0]}'
+            past_messages += f'\n### ASSISTANT: {message[1]}'
+    system = "### System: You help to brainstorm ideas.\n"
+    prompt_templated = f'{system} {messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'
     headers = {
         "Content-Type": "application/json"
     }
         "prompt": prompt_templated,
         "n_predict": n_predict,
         "stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"],
+        "stream": True
     }
+    result = ""
     try:
+        response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data), stream=True)
         if response.status_code == 200:
+            for line in response.iter_lines():
+                if line:
+                    try:
+                        result += json.loads(line.decode('utf-8').replace('data: ', ''))['content']
+                    except:
+                        # LMStudio response has empty token
+                        pass
+                    yield result
         else:
             response.raise_for_status()
+    except requests.exceptions.RequestException as e:
         raise gr.Warning("Apologies for the inconvenience! Our model is currently self-hosted and unavailable at the moment.")
 with gr.Blocks() as demo:
     gr.Image("sbc.jpg")
     gr.ChatInterface(