Tijmen2 commited on
Commit
be1aa47
1 Parent(s): 0359ec5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+
4
+ llm = Llama(
5
+ model_path="AstroSage-8B-Q8_0.gguf",
6
+ n_ctx=2048,
7
+ n_threads=4,
8
+ seed=42,
9
+ f16_kv=True,
10
+ logits_all=False,
11
+ use_mmap=True,
12
+ use_gpu=True
13
+ )
14
+
15
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
16
+ messages = [{"role": "system", "content": system_message}]
17
+ for user_msg, assistant_msg in history:
18
+ if user_msg:
19
+ messages.append({"role": "user", "content": user_msg})
20
+ if assistant_msg:
21
+ messages.append({"role": "assistant", "content": assistant_msg})
22
+ messages.append({"role": "user", "content": message})
23
+
24
+ response = llm.generate_chat(
25
+ messages,
26
+ max_tokens=max_tokens,
27
+ temperature=temperature,
28
+ top_p=top_p
29
+ )
30
+
31
+ return response
32
+
33
+ demo = gr.ChatInterface(
34
+ respond,
35
+ additional_inputs=[
36
+ gr.Textbox(value="Assume the role of AstroSage, a helpful chatbot designed to answer user queries about astronomy, astrophysics, and cosmology.", label="System message"),
37
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
38
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
39
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
40
+ ]
41
+ )
42
+
43
+ if __name__ == "__main__":
44
+ demo.launch()