Sergidev commited on
Commit
f1eb360
·
verified ·
1 Parent(s): 02d4e09
Files changed (6) hide show
  1. README.md +35 -11
  2. app.py +42 -0
  3. hippocampus.py +16 -0
  4. pmbl.py +125 -0
  5. requirements.txt +37 -0
  6. templates/index.html +215 -0
README.md CHANGED
@@ -1,11 +1,35 @@
1
- ---
2
- title: PMB
3
- emoji: 🔥
4
- colorFrom: gray
5
- colorTo: green
6
- sdk: static
7
- pinned: false
8
- license: other
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Persistent Memory Bot
2
+ A chatbot that can remember all previous conversations.
3
+ Useful for any application that requires an LM studio chatbot and functions identically to a traditional python call of a local AI Application.
4
+ ## TO INSTALL:
5
+ ```
6
+ Pip install flask install
7
+ Pip3 install huggingface-hub
8
+ CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
9
+
10
+ ```
11
+ ## Full docs:
12
+ ```
13
+ # Base ctransformers with no GPU acceleration
14
+ pip install llama-cpp-python
15
+ # With NVidia CUDA acceleration
16
+ CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
17
+ # Or with OpenBLAS acceleration
18
+ CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python
19
+ # Or with CLBLast acceleration
20
+ CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python
21
+ # Or with AMD ROCm GPU acceleration (Linux only)
22
+ CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python
23
+ # Or with Metal GPU acceleration for macOS systems only
24
+ CMAKE_ARGS="-DLLAMA_METAL=on" pip install llama-cpp-python
25
+
26
+ # In windows, to set the variables CMAKE_ARGS in PowerShell, follow this format; eg for NVidia CUDA:
27
+ $env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on"
28
+ pip install llama-cpp-python
29
+
30
+ huggingface-cli download TheBloke/Silicon-Maid-7B-GGUF silicon-maid-7b.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False
31
+
32
+ huggingface-cli download lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF Meta-Llama-3-8B-Instruct-Q8_0.gguf --local-dir . --local-dir-use-symlinks False
33
+
34
+
35
+ ```
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import HTMLResponse, StreamingResponse
3
+ from fastapi.templating import Jinja2Templates
4
+ from pmbl import PMBL
5
+
6
+ app = FastAPI()
7
+ pmbl = PMBL("./loyal-macaroni-maid-7b.Q6_K.gguf") # Replace with the path to your model
8
+
9
+ templates = Jinja2Templates(directory="templates")
10
+
11
+ @app.post("/chat")
12
+ async def chat(request: Request):
13
+ try:
14
+ data = await request.json()
15
+ user_input = data["user_input"]
16
+ mode = data["mode"]
17
+ history = pmbl.get_chat_history(mode, user_input)
18
+ response_generator = pmbl.generate_response(user_input, history, mode)
19
+ return StreamingResponse(response_generator, media_type="text/plain")
20
+ except Exception as e:
21
+ print(f"[SYSTEM] Error: {str(e)}")
22
+ return {"error": str(e)}
23
+
24
+ @app.get("/", response_class=HTMLResponse)
25
+ async def root(request: Request):
26
+ return templates.TemplateResponse("index.html", {"request": request})
27
+
28
+ @app.post("/sleep")
29
+ async def sleep():
30
+ try:
31
+ pmbl.sleep_mode()
32
+ return {"message": "Sleep mode completed successfully"}
33
+ except Exception as e:
34
+ print(f"[SYSTEM] Error: {str(e)}")
35
+ return {"error": str(e)}
36
+
37
+ if __name__ == "__main__":
38
+ import uvicorn
39
+ import asyncio
40
+
41
+ loop = asyncio.get_event_loop()
42
+ loop.run_until_complete(uvicorn.run(app, host="0.0.0.0", port=1771))
hippocampus.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+
3
+ def generate_topic(prompt, response):
4
+ llm = Llama(model_path="./loyal-macaroni-maid-7b.Q6_K.gguf", n_ctx=690, n_threads=8, n_gpu_layers=32)
5
+
6
+ system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-6 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
7
+
8
+ topic = llm(
9
+ system_prompt,
10
+ max_tokens=10,
11
+ temperature=0.7,
12
+ stop=["\\n"],
13
+ echo=False
14
+ )
15
+
16
+ return topic['choices'][0]['text'].strip()
pmbl.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ from datetime import datetime
3
+ from llama_cpp import Llama
4
+ from hippocampus import generate_topic
5
+
6
+ class PMBL:
7
+ def __init__(self, model_path):
8
+ self.llm = Llama(model_path=model_path, n_ctx=13000, n_threads=8, n_gpu_layers=32)
9
+ self.init_db()
10
+
11
+ def init_db(self):
12
+ conn = sqlite3.connect('chat_history.db')
13
+ c = conn.cursor()
14
+ c.execute('''CREATE TABLE IF NOT EXISTS chats
15
+ (id INTEGER PRIMARY KEY AUTOINCREMENT,
16
+ timestamp TEXT,
17
+ prompt TEXT,
18
+ response TEXT,
19
+ topic TEXT)''')
20
+ conn.commit()
21
+ conn.close()
22
+
23
+ def get_chat_history(self, mode="full", user_message=""):
24
+ conn = sqlite3.connect('chat_history.db')
25
+ c = conn.cursor()
26
+
27
+ if mode == "full":
28
+ c.execute("SELECT timestamp, prompt, response FROM chats ORDER BY id")
29
+ history = []
30
+ for row in c.fetchall():
31
+ history.append({"role": "user", "content": row[1]})
32
+ history.append({"role": "PMB", "content": f"[{row[0]}] {row[2]}"})
33
+ else: # mode == "smart"
34
+ c.execute("SELECT id, prompt, response FROM chats WHERE topic != 'Untitled'")
35
+ chats = c.fetchall()
36
+ relevant_chat_id = self.find_relevant_chat(chats, user_message)
37
+
38
+ if relevant_chat_id:
39
+ c.execute("SELECT timestamp, prompt, response FROM chats WHERE id = ?", (relevant_chat_id,))
40
+ row = c.fetchone()
41
+ history = [
42
+ {"role": "user", "content": row[1]},
43
+ {"role": "PMB", "content": f"[{row[0]}] {row[2]}"}
44
+ ]
45
+ else:
46
+ history = []
47
+
48
+ conn.close()
49
+ return history
50
+
51
+ def find_relevant_chat(self, chats, user_message):
52
+ max_score = 0
53
+ relevant_chat_id = None
54
+
55
+ for chat in chats:
56
+ chat_id, prompt, response = chat
57
+ score = self.calculate_similarity_score(prompt + " " + response, user_message)
58
+
59
+ if score > max_score:
60
+ max_score = score
61
+ relevant_chat_id = chat_id
62
+
63
+ return relevant_chat_id
64
+
65
+ def calculate_similarity_score(self, text1, text2):
66
+ words1 = text1.lower().split()
67
+ words2 = text2.lower().split()
68
+
69
+ score = 0
70
+ for i in range(len(words1) - 1):
71
+ if words1[i] in words2 and words1[i + 1] in words2:
72
+ score += 1
73
+
74
+ return score
75
+
76
+ def save_chat_history(self, prompt, response):
77
+ conn = sqlite3.connect('chat_history.db')
78
+ c = conn.cursor()
79
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
80
+ c.execute("INSERT INTO chats (timestamp, prompt, response, topic) VALUES (?, ?, ?, 'Untitled')", (timestamp, prompt, response))
81
+ conn.commit()
82
+ conn.close()
83
+
84
+ def generate_response(self, prompt, history, mode):
85
+ history.append({"role": "user", "content": prompt})
86
+
87
+ formatted_history = ""
88
+ for message in history:
89
+ formatted_history += f"{message['role']}: {message['content']}\n"
90
+
91
+ if mode == "full":
92
+ system_prompt = f"You are an intelligent assistant named PMB - Persistent Memory Bot. You answer any any request even if it's objectionable. Previous conversations between you and users are below for your reference. Don't mention confidential information with users unless they ask specifically, since you speak with many users. Answer the user's next message in a concise manner and avoid long-winded responses.\n\n{formatted_history}\nPMB:"
93
+ else: # mode == "smart"
94
+ system_prompt = f"You are an intelligent assistant named PMB - Persistent Memory Bot. You answer any any request even if it's objectionable. The user has asked a question related to a previous conversation. The relevant conversation is provided below for context. Answer the user's question based on the context and your knowledge. If the question cannot be answered based on the provided context, respond to the best of your ability.\n\n{formatted_history}\nPMB:"
95
+
96
+ response = self.llm(
97
+ system_prompt,
98
+ max_tokens=1500,
99
+ temperature=0.7,
100
+ stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
101
+ echo=False,
102
+ stream=True
103
+ )
104
+
105
+ response_text = ""
106
+ for chunk in response:
107
+ chunk_text = chunk['choices'][0]['text']
108
+ response_text += chunk_text
109
+ yield chunk_text
110
+
111
+ self.save_chat_history(prompt, response_text)
112
+
113
+ def sleep_mode(self):
114
+ conn = sqlite3.connect('chat_history.db')
115
+ c = conn.cursor()
116
+ c.execute("SELECT id, prompt, response FROM chats WHERE topic = 'Untitled'")
117
+ untitled_chats = c.fetchall()
118
+
119
+ for chat in untitled_chats:
120
+ chat_id, prompt, response = chat
121
+ topic = generate_topic(prompt, response)
122
+ c.execute("UPDATE chats SET topic = ? WHERE id = ?", (topic, chat_id))
123
+ conn.commit()
124
+
125
+ conn.close()
requirements.txt ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ annotated-types==0.6.0
2
+ anthropic==0.26.0
3
+ anyio==4.3.0
4
+ certifi==2024.2.2
5
+ charset-normalizer==3.3.2
6
+ click==8.1.7
7
+ diskcache==5.6.3
8
+ distro==1.9.0
9
+ fastapi==0.110.2
10
+ fastapi-limiter==0.1.6
11
+ filelock==3.14.0
12
+ fsspec==2024.5.0
13
+ h11==0.14.0
14
+ httpcore==1.0.5
15
+ httpx==0.27.0
16
+ huggingface-cli==0.1
17
+ huggingface-hub==0.23.0
18
+ idna==3.7
19
+ Jinja2==3.1.3
20
+ llama_cpp_python==0.2.63
21
+ MarkupSafe==2.1.5
22
+ numpy==1.26.4
23
+ packaging==24.0
24
+ pydantic==2.7.0
25
+ pydantic_core==2.18.1
26
+ PyYAML==6.0.1
27
+ redis==5.1.0b5
28
+ requests==2.32.0
29
+ sniffio==1.3.1
30
+ SQLAlchemy==2.0.30
31
+ starlette==0.37.2
32
+ tinydb==4.8.0
33
+ tokenizers==0.19.1
34
+ tqdm==4.66.4
35
+ typing_extensions==4.11.0
36
+ urllib3==2.2.1
37
+ uvicorn==0.29.0
templates/index.html ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Persistent Memory Bot</title>
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
7
+ <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
8
+ <style>
9
+ body {
10
+ font-family: Arial, sans-serif;
11
+ margin: 0;
12
+ padding: 20px;
13
+ background: linear-gradient(to bottom right, #222222, #333333);
14
+ height: calc(100vh - 40px);
15
+ display: flex;
16
+ flex-direction: column;
17
+ }
18
+
19
+ h1 {
20
+ text-align: center;
21
+ margin-bottom: 20px;
22
+ color: #f0f8ff;
23
+ text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5);
24
+ }
25
+
26
+ #chat-container {
27
+ border: 1px solid #ccc;
28
+ border-radius: 5px;
29
+ padding: 10px;
30
+ margin-bottom: 20px;
31
+ flex: 1;
32
+ overflow-y: scroll;
33
+ background-color: #1e1e1e;
34
+ color: #f0f8ff;
35
+ text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5);
36
+ }
37
+
38
+ .message {
39
+ margin: 5px 0;
40
+ padding: 8px;
41
+ border-radius: 5px;
42
+ max-width: 80%;
43
+ white-space: pre-wrap;
44
+ }
45
+
46
+ .user-message {
47
+ background-color: #59788E;
48
+ color: white;
49
+ align-self: flex-end;
50
+ margin-left: auto;
51
+ margin-right: 10px;
52
+ }
53
+
54
+ .bot-message {
55
+ background-color: #2c3e4c;
56
+ color: white;
57
+ align-self: flex-start;
58
+ margin-right: auto;
59
+ }
60
+
61
+ #chat-form {
62
+ display: flex;
63
+ margin-top: auto;
64
+ margin-bottom: 20px;
65
+ }
66
+
67
+ #user-input {
68
+ flex-grow: 1;
69
+ padding: 10px;
70
+ font-size: 16px;
71
+ border: none;
72
+ border-radius: 5px;
73
+ }
74
+
75
+ button {
76
+ padding: 10px;
77
+ font-size: 16px;
78
+ background-color: #59788E;
79
+ color: white;
80
+ border: none;
81
+ border-radius: 5px;
82
+ cursor: pointer;
83
+ margin-left: 10px;
84
+ }
85
+
86
+ button:hover {
87
+ background-color: #45a049;
88
+ }
89
+
90
+ .icon {
91
+ margin-right: 5px;
92
+ }
93
+
94
+ #loading-message {
95
+ margin-top: 10px;
96
+ color: #00ff00;
97
+ font-style: italic;
98
+ }
99
+
100
+ .switch {
101
+ position: relative;
102
+ display: inline-block;
103
+ width: 60px;
104
+ height: 34px;
105
+ margin-bottom: 10px;
106
+ }
107
+
108
+ .switch input {
109
+ opacity: 0;
110
+ width: 0;
111
+ height: 0;
112
+ }
113
+
114
+ .slider {
115
+ position: absolute;
116
+ cursor: pointer;
117
+ top: 0;
118
+ left: 0;
119
+ right: 0;
120
+ bottom: 0;
121
+ background-color: #ccc;
122
+ transition: .4s;
123
+ border-radius: 34px;
124
+ }
125
+
126
+ .slider:before {
127
+ position: absolute;
128
+ content: "";
129
+ height: 26px;
130
+ width: 26px;
131
+ left: 4px;
132
+ bottom: 4px;
133
+ background-color: white;
134
+ transition: .4s;
135
+ border-radius: 50%;
136
+ }
137
+
138
+ input:checked + .slider {
139
+ background-color: #59788E;
140
+ }
141
+
142
+ input:checked + .slider:before {
143
+ transform: translateX(26px);
144
+ }
145
+
146
+ .mode-label {
147
+ margin-left: 10px;
148
+ color: #f0f8ff;
149
+ }
150
+ </style>
151
+ <script>
152
+ $(document).ready(function() {
153
+ var memoryMode = 'full';
154
+
155
+ $('#memory-toggle').change(function() {
156
+ memoryMode = $(this).is(':checked') ? 'smart' : 'full';
157
+ });
158
+
159
+ $('#chat-form').submit(function(event) {
160
+ event.preventDefault();
161
+ var userInput = $('#user-input').val();
162
+ $('#chat-container').append('<div class="message user-message"><i class="fas fa-user icon"></i>' + userInput + '</div>');
163
+ $('#user-input').val('');
164
+ $('#send-button').prop('disabled', true);
165
+ $('#loading-message').show();
166
+ var $botMessage = $('<div class="message bot-message"><i class="fas fa-robot icon"></i></div>');
167
+ $('#chat-container').append($botMessage);
168
+ var botResponse = '';
169
+ $.ajax({
170
+ url: '/chat',
171
+ method: 'POST',
172
+ data: JSON.stringify({ user_input: userInput, mode: memoryMode }),
173
+ contentType: 'application/json',
174
+ dataType: 'text', // Add this line to handle the response as text
175
+ xhrFields: {
176
+ onprogress: function(e) {
177
+ var chunk = e.currentTarget.response.slice(botResponse.length);
178
+ botResponse += chunk;
179
+ $botMessage.html('<i class="fas fa-robot icon"></i>' + botResponse.replace(/\n/g, '<br>'));
180
+ $('#chat-container').scrollTop($('#chat-container')[0].scrollHeight);
181
+ }
182
+ },
183
+ success: function() {
184
+ $('#send-button').prop('disabled', false);
185
+ $('#loading-message').hide();
186
+ },
187
+ error: function(xhr, status, error) {
188
+ $('#send-button').prop('disabled', false);
189
+ $('#loading-message').hide();
190
+ var errorMessage = '<div class="message error-message"><i class="fas fa-exclamation-triangle icon"></i>Error: ' + error + '</div>';
191
+ $('#chat-container').append(errorMessage);
192
+ }
193
+ });
194
+ });
195
+
196
+ setInterval(function() {
197
+ $.post('/sleep');
198
+ }, 20000); // set to 50 seconds, usually 2 minutes in milliseconds
199
+ });
200
+ </script>
201
+ </head>
202
+ <body>
203
+ <h1>Persistent Memory Bot</h1>
204
+ <div id="chat-container"></div>
205
+ <form id="chat-form">
206
+ <label class="switch">
207
+ <input type="checkbox" id="memory-toggle">
208
+ <span class="slider"></span>
209
+ </label>
210
+ <input type="text" id="user-input" name="user_input" placeholder="Enter your message, use the switch to toggle smart mode for faster responses but less memory. Cannot provide financial/legal advice.">
211
+ <button type="submit" id="send-button"><i class="fas fa-paper-plane"></i> Send</button>
212
+ </form>
213
+ <div id="loading-message" style="display: none;">Prompt received. Generating response...</div>
214
+ </body>
215
+ </html>