Abhaykoul commited on
Commit
4f08543
1 Parent(s): a872d3e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -0
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import json
3
+ import subprocess
4
+ from llama_cpp import Llama
5
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
6
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
7
+ from llama_cpp_agent.chat_history import BasicChatHistory
8
+ from llama_cpp_agent.chat_history.messages import Roles
9
+ import gradio as gr
10
+ from huggingface_hub import hf_hub_download
11
+
12
+ hf_hub_download(
13
+ repo_id="OEvortex/HelpingAI-6B",
14
+ filename="helpingai-6b-q4_k_m.gguf",
15
+ local_dir="./models"
16
+ )
17
+
18
+
19
+
20
+ llm = None
21
+ llm_model = None
22
+
23
+ @spaces.GPU()
24
+ def respond(
25
+ message,
26
+ history: list[tuple[str, str]],
27
+ model,
28
+ system_message,
29
+ max_tokens,
30
+ temperature,
31
+ top_p,
32
+ top_k,
33
+ repeat_penalty,
34
+ ):
35
+ chat_template = MessagesFormatterType.LLAMA_3
36
+
37
+ global llm
38
+ global llm_model
39
+
40
+ if llm is None or llm_model != model:
41
+ llm = Llama(
42
+ model_path=f"models/{model}",
43
+ flash_attn=True,
44
+ n_gpu_layers=81,
45
+ n_batch=1024,
46
+ n_ctx=4096,
47
+ )
48
+ llm_model = model
49
+
50
+ provider = LlamaCppPythonProvider(llm)
51
+
52
+ agent = LlamaCppAgent(
53
+ provider,
54
+ system_prompt=f"{system_message}",
55
+ predefined_messages_formatter_type=chat_template,
56
+ debug_output=True
57
+ )
58
+
59
+ settings = provider.get_provider_default_settings()
60
+ settings.temperature = temperature
61
+ settings.top_k = top_k
62
+ settings.top_p = top_p
63
+ settings.max_tokens = max_tokens
64
+ settings.repeat_penalty = repeat_penalty
65
+ settings.stream = True
66
+
67
+ messages = BasicChatHistory()
68
+
69
+ for msn in history:
70
+ user = {
71
+ 'role': Roles.user,
72
+ 'content': msn[0]
73
+ }
74
+ assistant = {
75
+ 'role': Roles.assistant,
76
+ 'content': msn[1]
77
+ }
78
+ messages.add_message(user)
79
+ messages.add_message(assistant)
80
+
81
+ stream = agent.get_chat_response(
82
+ message,
83
+ llm_sampling_settings=settings,
84
+ chat_history=messages,
85
+ returns_streaming_generator=True,
86
+ print_output=False
87
+ )
88
+
89
+ outputs = ""
90
+ for output in stream:
91
+ outputs += output
92
+ yield outputs
93
+
94
+ description = "🌟 HelpingAI: Emotionally Intelligent Conversational AI 🚀"
95
+
96
+
97
+ demo = gr.ChatInterface(
98
+ respond,
99
+ additional_inputs=[
100
+ gr.Dropdown([
101
+ 'helpingai-6b-q4_k_m.gguf',
102
+ ],
103
+ value="helpingai-6b-q4_k_m.gguf",
104
+ label="Model"
105
+ ),
106
+ gr.Textbox(value="You are HelpingAI a emotional AI always answer my question in HelpingAI style", label="System message"),
107
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
108
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
109
+ gr.Slider(
110
+ minimum=0.1,
111
+ maximum=1.0,
112
+ value=0.95,
113
+ step=0.05,
114
+ label="Top-p",
115
+ ),
116
+ gr.Slider(
117
+ minimum=0,
118
+ maximum=100,
119
+ value=40,
120
+ step=1,
121
+ label="Top-k",
122
+ ),
123
+ gr.Slider(
124
+ minimum=0.0,
125
+ maximum=2.0,
126
+ value=1.1,
127
+ step=0.1,
128
+ label="Repetition penalty",
129
+ ),
130
+ ],
131
+ retry_btn="Retry",
132
+ undo_btn="Undo",
133
+ clear_btn="Clear",
134
+ submit_btn="Send",
135
+ title="Chat with HelpingAI using llama.cpp",
136
+ description=description,
137
+ chatbot=gr.Chatbot(
138
+ scale=1,
139
+ likeable=False,
140
+ show_copy_button=True
141
+ )
142
+ )
143
+
144
+ if __name__ == "__main__":
145
+ demo.launch()