vilarin's picture
Update app.py
cc5b602 verified
raw
history blame
No virus
3.9 kB
model = "gemma2:27b"
import ollama
import os
ollama_path="/usr/local/lib/python3.10/site-packages/ollama"
os.system(f"{ollama_path} ollama pull {model}")
import copy
import gradio as gr
import spaces
from llama_index.llms.ollama import Ollama
import llama_index
from llama_index.core.llms import ChatMessage
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL_ID = "google/gemma-2-27b-it"
MODEL_NAME = MODEL_ID.split("/")[-1]
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
gemma2 = Ollama(model=model, request_timeout=30.0)
TITLE = "<h1><center>Chatbox</center></h1>"
DESCRIPTION = f"""
<h3>MODEL: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
<center>
<p>Gemma is the large language model built by Google.
<br>
Feel free to test without log.
</p>
</center>
"""
CSS = """
.duplicate-button {
margin: auto !important;
color: white !important;
background: black !important;
border-radius: 100vh !important;
}
h3 {
text-align: center;
}
"""
@spaces.GPU(duration=90)
def stream_chat(message: str, history: list, temperature: float, context_window: int, top_p: float, top_k: int, penalty: float):
print(f'message is - {message}')
print(f'history is - {history}')
conversation = []
for prompt, answer in history:
conversation.extend([
ChatMessage(
role="user", content=prompt
),
ChatMessage(role="assistant", content=answer),
])
messages = [ChatMessage(role="user", content=message)]
print(f"Conversation is -\n{conversation}")
resp = gemma2.stream_chat(
message = messages,
chat_history = conversation,
top_p=top_p,
top_k=top_k,
repeat_penalty=penalty,
context_window=context_window,
)
for r in resp:
yield r.delta
chatbot = gr.Chatbot(height=600)
with gr.Blocks(css=CSS, theme="soft") as demo:
gr.HTML(TITLE)
gr.HTML(DESCRIPTION)
gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
gr.ChatInterface(
fn=stream_chat,
chatbot=chatbot,
fill_height=True,
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
additional_inputs=[
gr.Slider(
minimum=0,
maximum=1,
step=0.1,
value=0.8,
label="Temperature",
render=False,
),
gr.Slider(
minimum=128,
maximum=2048,
step=1,
value=1024,
label="Context window",
render=False,
),
gr.Slider(
minimum=0.0,
maximum=1.0,
step=0.1,
value=0.8,
label="top_p",
render=False,
),
gr.Slider(
minimum=1,
maximum=20,
step=1,
value=20,
label="top_k",
render=False,
),
gr.Slider(
minimum=0.0,
maximum=2.0,
step=0.1,
value=1.0,
label="Repetition penalty",
render=False,
),
],
examples=[
["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."],
["Tell me a random fun fact about the Roman Empire."],
["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
],
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()