Llama-3.2-1B-Instruct

Running on Zero

vilarin commited on Jun 28

Commit

638dabe

•

1 Parent(s): e29928f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import os
 import threading
 import time
 import subprocess
-import ollama
 OLLAMA = os.path.expanduser("~/ollama")
@@ -11,8 +10,6 @@ if not os.path.exists(OLLAMA):
     subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
     os.chmod(OLLAMA, 0o755)
 def ollama_service_thread():
     subprocess.run("~/ollama serve", shell=True)
@@ -38,8 +35,9 @@ MODEL_NAME = MODEL_ID.split("/")[-1]
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-gemma2 = Ollama(model="gemma2:27b", request_timeout=30.0)
 TITLE = "<h1><center>Chatbox</center></h1>"
@@ -81,7 +79,9 @@ def stream_chat(message: str, history: list, temperature: float, context_window:
     print(f"Conversation is -\n{conversation}")
-    resp = gemma2.stream_chat(
         message = messages,
         chat_history = conversation,
         top_p=top_p,

 import threading
 import time
 import subprocess
 OLLAMA = os.path.expanduser("~/ollama")
     subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
     os.chmod(OLLAMA, 0o755)
 def ollama_service_thread():
     subprocess.run("~/ollama serve", shell=True)
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+@spaces.GPU()
+def gemma2():
+    return Ollama(model="gemma2:27b", request_timeout=30.0)
 TITLE = "<h1><center>Chatbox</center></h1>"
     print(f"Conversation is -\n{conversation}")
+    llm = gemma2()
+    resp = llm.stream_chat(
         message = messages,
         chat_history = conversation,
         top_p=top_p,