geoffhorowitz commited on
Commit
1b4620e
β€’
1 Parent(s): 680dd3b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from ctransformers import AutoModelForCausalLM
4
+
5
+ # choose your champion
6
+ #model_id = "TheBloke/Llama-2-7B-GGML"
7
+ model_id = "TheBloke/Llama-2-7B-chat-GGML"
8
+ #model_id = "TheBloke/Llama-2-13B-GGML"
9
+ #model_id = "TheBloke/Llama-2-13B-chat-GGML"
10
+
11
+ # instantiate other inputs
12
+ gpu_layers = 130 if '13B' in model_id else 110
13
+ config = {'max_new_tokens': 256, 'repetition_penalty': 1.1, 'temperature': 0.1, 'stream': True}
14
+
15
+ # get llm instance
16
+ llm = AutoModelForCausalLM.from_pretrained(model_id,
17
+ model_type="llama",
18
+ #lib='avx2', #for cpu use
19
+ gpu_layers=gpu_layers, #110 for 7b, 130 for 13b
20
+ **config
21
+ )
22
+
23
+ def predict(prompt):
24
+ # write prompt & tokenize
25
+ #system_prompt = """
26
+ #"""
27
+
28
+ # send through model
29
+ res = llm(prompt, stream=False)
30
+ return res
31
+
32
+ demo = gr.Interface(
33
+ fn=predict,
34
+ inputs='text',
35
+ outputs='text',
36
+ )
37
+
38
+ demo.launch()