File size: 614 Bytes
d4cdbf0
20f6917
d4cdbf0
b12a44a
 
 
 
 
 
 
 
 
 
 
 
 
7c324c2
20f6917
a3cd95c
 
0934e21
f40ba5e
a9d7f2c
20f6917
 
 
7a45842
8fdda21
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import gradio as gr
from llama_cpp import Llama

try:
    llm = Llama.from_pretrained(
        repo_id="operablepattern/gemma-2b-it-Q",
        filename="*Q5_K_M.gguf",
        chat_format="gemma",
        verbose=True
    )
except:
    llm = Llama(
        model_path="./gemma-2b-it-Q5_K_M.gguf",
        chat_format="gemma",
        verbose=True
    )

def response(message, history):
    print(message)
    print(history)
    output = llm(message,max_tokens=32)
    print(output)
    return output["choices"][0]["text"].strip()

gr.ChatInterface(
    fn=response,
    title="Chat with Gemma",
).queue().launch()