Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import os
|
3 |
import threading
|
4 |
import time
|
@@ -18,19 +17,16 @@ OLLAMA_SERVICE_THREAD.start()
|
|
18 |
|
19 |
print("Giving ollama serve a moment")
|
20 |
time.sleep(10)
|
21 |
-
subprocess.run("~/ollama
|
22 |
|
23 |
|
24 |
import copy
|
25 |
import gradio as gr
|
26 |
-
import
|
27 |
-
from llama_index.llms.ollama import Ollama
|
28 |
-
import llama_index
|
29 |
-
from llama_index.core.llms import ChatMessage
|
30 |
|
31 |
|
32 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
33 |
-
MODEL_ID = "google/gemma-2-
|
34 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
35 |
|
36 |
TITLE = "<h1><center>Chatbox</center></h1>"
|
@@ -56,33 +52,30 @@ h3 {
|
|
56 |
text-align: center;
|
57 |
}
|
58 |
"""
|
59 |
-
@spaces.GPU()
|
60 |
def stream_chat(message: str, history: list, temperature: float, context_window: int, top_p: float, top_k: int, penalty: float):
|
61 |
print(f'message is - {message}')
|
62 |
print(f'history is - {history}')
|
63 |
conversation = []
|
64 |
for prompt, answer in history:
|
65 |
conversation.extend([
|
66 |
-
|
67 |
-
role
|
68 |
-
),
|
69 |
-
ChatMessage(role="assistant", content=answer),
|
70 |
])
|
71 |
-
|
72 |
|
73 |
print(f"Conversation is -\n{conversation}")
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
messages
|
78 |
-
|
79 |
-
top_p=top_p,
|
80 |
-
top_k=top_k,
|
81 |
-
repeat_penalty=penalty,
|
82 |
-
context_window=context_window,
|
83 |
)
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
86 |
|
87 |
|
88 |
chatbot = gr.Chatbot(height=600)
|
|
|
|
|
1 |
import os
|
2 |
import threading
|
3 |
import time
|
|
|
17 |
|
18 |
print("Giving ollama serve a moment")
|
19 |
time.sleep(10)
|
20 |
+
subprocess.run("~/ollama pull gemma2", shell=True)
|
21 |
|
22 |
|
23 |
import copy
|
24 |
import gradio as gr
|
25 |
+
import ollama
|
|
|
|
|
|
|
26 |
|
27 |
|
28 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
29 |
+
MODEL_ID = os.environ.get(MODEL_ID, "google/gemma-2-9b-it")
|
30 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
31 |
|
32 |
TITLE = "<h1><center>Chatbox</center></h1>"
|
|
|
52 |
text-align: center;
|
53 |
}
|
54 |
"""
|
|
|
55 |
def stream_chat(message: str, history: list, temperature: float, context_window: int, top_p: float, top_k: int, penalty: float):
|
56 |
print(f'message is - {message}')
|
57 |
print(f'history is - {history}')
|
58 |
conversation = []
|
59 |
for prompt, answer in history:
|
60 |
conversation.extend([
|
61 |
+
{"role": "user", "content": prompt},
|
62 |
+
{"role": "assistant", "content": answer})
|
|
|
|
|
63 |
])
|
64 |
+
conversation.append({"role": "user", "content": message})
|
65 |
|
66 |
print(f"Conversation is -\n{conversation}")
|
67 |
|
68 |
+
response = ollama.chat(
|
69 |
+
model="gemma2",
|
70 |
+
messages=conversation,
|
71 |
+
stream=True
|
|
|
|
|
|
|
|
|
72 |
)
|
73 |
+
|
74 |
+
message = ""
|
75 |
+
for chunk in response:
|
76 |
+
message += chunk["message"]["content"]
|
77 |
+
yield "", message
|
78 |
+
|
79 |
|
80 |
|
81 |
chatbot = gr.Chatbot(height=600)
|