erfanzar commited on
Commit
fa1cb61
1 Parent(s): 674476e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cLLM.gradio import GradioUserInference
2
+ from cLLM.interactors import OpenChatInteract
3
+ from cLLM import LlamaCPParams, InferenceSession, LlamaCPPGenerationConfig
4
+ from huggingface_hub import hf_hub_download
5
+
6
+
7
+ def launch():
8
+ interact = OpenChatInteract(
9
+ user_name="User",
10
+ assistant_name="cLLM-GPT"
11
+ )
12
+
13
+ params = LlamaCPParams(
14
+ model_path=hf_hub_download(
15
+ "TheBloke/phi-2-GGUF",
16
+ "phi-2.Q4_K_S.gguf"
17
+ ),
18
+ num_threads=8,
19
+ verbose=False,
20
+ num_batch=512,
21
+ num_context=2048,
22
+ offload_kqv=True,
23
+ )
24
+
25
+ inference = InferenceSession.create(
26
+ llama_params=params,
27
+ generation_config=LlamaCPPGenerationConfig(
28
+ stream=True,
29
+ stop=interact.get_stop_signs()
30
+ )
31
+ )
32
+
33
+ interface = GradioUserInference(
34
+ interactor=interact,
35
+ inference_session=inference,
36
+ llama_param=params,
37
+ use_prefix_for_interactor=True
38
+ )
39
+
40
+ interface.build_chat_interface().launch()
41
+
42
+
43
+ if __name__ == "__main__":
44
+ launch()