Spaces:
Running
Running
oceansweep
commited on
Commit
•
c8ebc55
1
Parent(s):
f71d2e6
Upload 11 files
Browse files- App_Function_Libraries/Books/Book_Ingestion_Lib.py +394 -60
- App_Function_Libraries/Character_Chat/Character_Chat_Lib.py +541 -0
- App_Function_Libraries/Character_Chat/__init__.py +0 -0
- App_Function_Libraries/DB/Character_Chat_DB.py +701 -684
- App_Function_Libraries/DB/DB_Manager.py +4 -1
- App_Function_Libraries/DB/SQLite_DB.py +23 -9
- App_Function_Libraries/Utils/Utils.py +44 -5
App_Function_Libraries/Books/Book_Ingestion_Lib.py
CHANGED
@@ -14,35 +14,290 @@
|
|
14 |
# Import necessary libraries
|
15 |
import os
|
16 |
import re
|
|
|
|
|
17 |
from datetime import datetime
|
18 |
import logging
|
19 |
|
20 |
import ebooklib
|
21 |
from bs4 import BeautifulSoup
|
22 |
from ebooklib import epub
|
|
|
|
|
23 |
#
|
24 |
# Import Local
|
25 |
-
from App_Function_Libraries.DB.DB_Manager import add_media_with_keywords
|
|
|
|
|
|
|
26 |
#
|
27 |
#######################################################################################################################
|
28 |
# Function Definitions
|
29 |
#
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
def read_epub(file_path):
|
34 |
-
"""
|
35 |
-
|
36 |
-
chapters = []
|
37 |
-
for item in book.get_items():
|
38 |
-
if item.get_type() == ebooklib.ITEM_DOCUMENT:
|
39 |
-
chapters.append(item.get_content())
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
# Ingest a text file into the database with Title/Author/Keywords
|
@@ -57,15 +312,28 @@ def extract_epub_metadata(content):
|
|
57 |
|
58 |
|
59 |
def ingest_text_file(file_path, title=None, author=None, keywords=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
try:
|
61 |
with open(file_path, 'r', encoding='utf-8') as file:
|
62 |
content = file.read()
|
63 |
|
64 |
# Check if it's a converted epub and extract metadata if so
|
65 |
-
if 'epub_converted' in (keywords or ''):
|
66 |
extracted_title, extracted_author = extract_epub_metadata(content)
|
67 |
title = title or extracted_title
|
68 |
author = author or extracted_author
|
|
|
69 |
|
70 |
# If title is still not provided, use the filename without extension
|
71 |
if not title:
|
@@ -95,6 +363,7 @@ def ingest_text_file(file_path, title=None, author=None, keywords=None):
|
|
95 |
ingestion_date=datetime.now().strftime('%Y-%m-%d')
|
96 |
)
|
97 |
|
|
|
98 |
return f"Text file '{title}' by {author} ingested successfully."
|
99 |
except Exception as e:
|
100 |
logging.error(f"Error ingesting text file: {str(e)}")
|
@@ -102,68 +371,133 @@ def ingest_text_file(file_path, title=None, author=None, keywords=None):
|
|
102 |
|
103 |
|
104 |
def ingest_folder(folder_path, keywords=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
results = []
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
|
113 |
def epub_to_markdown(epub_path):
|
114 |
-
|
115 |
-
|
116 |
-
chapters = []
|
117 |
-
|
118 |
-
# Extract and format the table of contents
|
119 |
-
toc = book.toc
|
120 |
-
for item in toc:
|
121 |
-
if isinstance(item, tuple):
|
122 |
-
section, children = item
|
123 |
-
level = 1
|
124 |
-
markdown_content += format_toc_item(section, level)
|
125 |
-
for child in children:
|
126 |
-
markdown_content += format_toc_item(child, level + 1)
|
127 |
-
else:
|
128 |
-
markdown_content += format_toc_item(item, 1)
|
129 |
|
130 |
-
|
|
|
131 |
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
chapter_title = title.get_text()
|
142 |
-
markdown_content += f"# {chapter_title}\n\n"
|
143 |
|
144 |
-
# Process chapter content
|
145 |
-
for elem in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol']):
|
146 |
-
if elem.name.startswith('h'):
|
147 |
-
level = int(elem.name[1])
|
148 |
-
markdown_content += f"{'#' * level} {elem.get_text()}\n\n"
|
149 |
-
elif elem.name == 'p':
|
150 |
-
markdown_content += f"{elem.get_text()}\n\n"
|
151 |
-
elif elem.name in ['ul', 'ol']:
|
152 |
-
for li in elem.find_all('li'):
|
153 |
-
markdown_content += f"- {li.get_text()}\n"
|
154 |
-
markdown_content += "\n"
|
155 |
|
156 |
-
|
|
|
|
|
157 |
|
158 |
-
|
|
|
|
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
-
|
162 |
-
|
|
|
|
|
163 |
|
164 |
|
165 |
def slugify(text):
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
#
|
169 |
# End of Function Definitions
|
|
|
14 |
# Import necessary libraries
|
15 |
import os
|
16 |
import re
|
17 |
+
import tempfile
|
18 |
+
import zipfile
|
19 |
from datetime import datetime
|
20 |
import logging
|
21 |
|
22 |
import ebooklib
|
23 |
from bs4 import BeautifulSoup
|
24 |
from ebooklib import epub
|
25 |
+
|
26 |
+
from App_Function_Libraries.Chunk_Lib import chunk_ebook_by_chapters
|
27 |
#
|
28 |
# Import Local
|
29 |
+
from App_Function_Libraries.DB.DB_Manager import add_media_with_keywords, add_media_to_database
|
30 |
+
from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
|
31 |
+
|
32 |
+
|
33 |
#
|
34 |
#######################################################################################################################
|
35 |
# Function Definitions
|
36 |
#
|
37 |
|
38 |
+
def import_epub(file_path, title=None, author=None, keywords=None, custom_prompt=None, system_prompt=None, summary=None,
|
39 |
+
auto_summarize=False, api_name=None, api_key=None, chunk_options=None, custom_chapter_pattern=None):
|
40 |
+
"""
|
41 |
+
Imports an EPUB file, extracts its content, chunks it, optionally summarizes it, and adds it to the database.
|
42 |
+
|
43 |
+
Parameters:
|
44 |
+
- file_path (str): Path to the EPUB file.
|
45 |
+
- title (str, optional): Title of the book.
|
46 |
+
- author (str, optional): Author of the book.
|
47 |
+
- keywords (str, optional): Comma-separated keywords for the book.
|
48 |
+
- custom_prompt (str, optional): Custom user prompt for summarization.
|
49 |
+
- summary (str, optional): Predefined summary of the book.
|
50 |
+
- auto_summarize (bool, optional): Whether to auto-summarize the chunks.
|
51 |
+
- api_name (str, optional): API name for summarization.
|
52 |
+
- api_key (str, optional): API key for summarization.
|
53 |
+
- chunk_options (dict, optional): Options for chunking.
|
54 |
+
- custom_chapter_pattern (str, optional): Custom regex pattern for chapter detection.
|
55 |
+
|
56 |
+
Returns:
|
57 |
+
- str: Status message indicating success or failure.
|
58 |
+
"""
|
59 |
+
try:
|
60 |
+
logging.info(f"Importing EPUB file from {file_path}")
|
61 |
+
|
62 |
+
# Convert EPUB to Markdown
|
63 |
+
markdown_content = epub_to_markdown(file_path)
|
64 |
+
logging.debug("Converted EPUB to Markdown.")
|
65 |
+
|
66 |
+
# Extract metadata if not provided
|
67 |
+
if not title or not author:
|
68 |
+
extracted_title, extracted_author = extract_epub_metadata(markdown_content)
|
69 |
+
title = title or extracted_title or os.path.splitext(os.path.basename(file_path))[0]
|
70 |
+
author = author or extracted_author or "Unknown"
|
71 |
+
logging.debug(f"Extracted metadata - Title: {title}, Author: {author}")
|
72 |
+
|
73 |
+
# Process keywords
|
74 |
+
keyword_list = [kw.strip() for kw in keywords.split(',')] if keywords else []
|
75 |
+
logging.debug(f"Keywords: {keyword_list}")
|
76 |
+
|
77 |
+
# Set default chunk options if not provided
|
78 |
+
if chunk_options is None:
|
79 |
+
chunk_options = {
|
80 |
+
'method': 'chapter',
|
81 |
+
'max_size': 500,
|
82 |
+
'overlap': 200,
|
83 |
+
'custom_chapter_pattern': custom_chapter_pattern
|
84 |
+
}
|
85 |
+
else:
|
86 |
+
# Ensure 'method' is set to 'chapter' when using chapter chunking
|
87 |
+
chunk_options.setdefault('method', 'chapter')
|
88 |
+
chunk_options.setdefault('custom_chapter_pattern', custom_chapter_pattern)
|
89 |
+
|
90 |
+
# Chunk the content by chapters
|
91 |
+
chunks = chunk_ebook_by_chapters(markdown_content, chunk_options)
|
92 |
+
logging.info(f"Total chunks created: {len(chunks)}")
|
93 |
+
if chunks:
|
94 |
+
logging.debug(f"Structure of first chunk: {chunks[0].keys()}")
|
95 |
+
|
96 |
+
|
97 |
+
# Handle summarization if enabled
|
98 |
+
if auto_summarize and api_name and api_key:
|
99 |
+
logging.info("Auto-summarization is enabled.")
|
100 |
+
summarized_chunks = []
|
101 |
+
for chunk in chunks:
|
102 |
+
chunk_text = chunk.get('text', '')
|
103 |
+
if chunk_text:
|
104 |
+
summary_text = perform_summarization(api_name, chunk_text, custom_prompt, api_key, recursive_summarization=False, temp=None, system_message=system_prompt)
|
105 |
+
chunk['metadata']['summary'] = summary_text
|
106 |
+
summarized_chunks.append(chunk)
|
107 |
+
chunks = summarized_chunks
|
108 |
+
logging.info("Summarization of chunks completed.")
|
109 |
+
else:
|
110 |
+
# If not summarizing, set a default summary or use provided summary
|
111 |
+
if summary:
|
112 |
+
logging.debug("Using provided summary.")
|
113 |
+
else:
|
114 |
+
summary = "No summary provided."
|
115 |
+
|
116 |
+
# Create info_dict
|
117 |
+
info_dict = {
|
118 |
+
'title': title,
|
119 |
+
'uploader': author,
|
120 |
+
'ingestion_date': datetime.now().strftime('%Y-%m-%d')
|
121 |
+
}
|
122 |
+
|
123 |
+
# Prepare segments for database
|
124 |
+
segments = [{'Text': chunk.get('text', chunk.get('content', ''))} for chunk in chunks]
|
125 |
+
logging.debug(f"Prepared segments for database. Number of segments: {len(segments)}")
|
126 |
+
|
127 |
+
# Add to database
|
128 |
+
result = add_media_to_database(
|
129 |
+
url=file_path,
|
130 |
+
info_dict=info_dict,
|
131 |
+
segments=segments,
|
132 |
+
summary=summary,
|
133 |
+
keywords=keyword_list,
|
134 |
+
custom_prompt_input=custom_prompt,
|
135 |
+
whisper_model="Imported",
|
136 |
+
media_type="ebook",
|
137 |
+
overwrite=False
|
138 |
+
)
|
139 |
+
|
140 |
+
logging.info(f"Ebook '{title}' by {author} imported successfully. Database result: {result}")
|
141 |
+
return f"Ebook '{title}' by {author} imported successfully. Database result: {result}"
|
142 |
+
|
143 |
+
except Exception as e:
|
144 |
+
logging.exception(f"Error importing ebook: {str(e)}")
|
145 |
+
return f"Error importing ebook: {str(e)}"
|
146 |
+
|
147 |
+
# FIXME
|
148 |
+
def process_zip_file(zip_file, title, author, keywords, custom_prompt, system_prompt, summary, auto_summarize, api_name, api_key, chunk_options):
|
149 |
+
"""
|
150 |
+
Processes a ZIP file containing multiple EPUB files and imports each one.
|
151 |
+
|
152 |
+
Parameters:
|
153 |
+
- zip_file (file-like object): The ZIP file to process.
|
154 |
+
- title (str): Title prefix for the books.
|
155 |
+
- author (str): Author name for the books.
|
156 |
+
- keywords (str): Comma-separated keywords.
|
157 |
+
- custom_prompt (str): Custom user prompt for summarization.
|
158 |
+
- summary (str): Predefined summary (not used in this context).
|
159 |
+
- auto_summarize (bool): Whether to auto-summarize the chunks.
|
160 |
+
- api_name (str): API name for summarization.
|
161 |
+
- api_key (str): API key for summarization.
|
162 |
+
- chunk_options (dict): Options for chunking.
|
163 |
+
|
164 |
+
Returns:
|
165 |
+
- str: Combined status messages for all EPUB files in the ZIP.
|
166 |
+
"""
|
167 |
+
results = []
|
168 |
+
try:
|
169 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
170 |
+
zip_path = zip_file.name if hasattr(zip_file, 'name') else zip_file.path
|
171 |
+
logging.info(f"Extracting ZIP file {zip_path} to temporary directory {temp_dir}")
|
172 |
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
173 |
+
zip_ref.extractall(temp_dir)
|
174 |
+
|
175 |
+
for filename in os.listdir(temp_dir):
|
176 |
+
if filename.lower().endswith('.epub'):
|
177 |
+
file_path = os.path.join(temp_dir, filename)
|
178 |
+
logging.info(f"Processing EPUB file {filename} from ZIP.")
|
179 |
+
result = import_epub(
|
180 |
+
file_path=file_path,
|
181 |
+
title=title,
|
182 |
+
author=author,
|
183 |
+
keywords=keywords,
|
184 |
+
custom_prompt=custom_prompt,
|
185 |
+
summary=summary,
|
186 |
+
auto_summarize=auto_summarize,
|
187 |
+
api_name=api_name,
|
188 |
+
api_key=api_key,
|
189 |
+
chunk_options=chunk_options,
|
190 |
+
custom_chapter_pattern=chunk_options.get('custom_chapter_pattern') if chunk_options else None
|
191 |
+
)
|
192 |
+
results.append(f"File: {filename} - {result}")
|
193 |
+
logging.info("Completed processing all EPUB files in the ZIP.")
|
194 |
+
except Exception as e:
|
195 |
+
logging.exception(f"Error processing ZIP file: {str(e)}")
|
196 |
+
return f"Error processing ZIP file: {str(e)}"
|
197 |
+
|
198 |
+
return "\n".join(results)
|
199 |
+
|
200 |
+
|
201 |
+
def import_file_handler(file, title, author, keywords, system_prompt, custom_prompt, auto_summarize, api_name,
|
202 |
+
api_key, max_chunk_size, chunk_overlap, custom_chapter_pattern):
|
203 |
+
try:
|
204 |
+
# Handle max_chunk_size
|
205 |
+
if isinstance(max_chunk_size, str):
|
206 |
+
max_chunk_size = int(max_chunk_size) if max_chunk_size.strip() else 4000
|
207 |
+
elif not isinstance(max_chunk_size, int):
|
208 |
+
max_chunk_size = 4000 # Default value if not a string or int
|
209 |
+
|
210 |
+
# Handle chunk_overlap
|
211 |
+
if isinstance(chunk_overlap, str):
|
212 |
+
chunk_overlap = int(chunk_overlap) if chunk_overlap.strip() else 0
|
213 |
+
elif not isinstance(chunk_overlap, int):
|
214 |
+
chunk_overlap = 0 # Default value if not a string or int
|
215 |
+
|
216 |
+
chunk_options = {
|
217 |
+
'method': 'chapter',
|
218 |
+
'max_size': max_chunk_size,
|
219 |
+
'overlap': chunk_overlap,
|
220 |
+
'custom_chapter_pattern': custom_chapter_pattern if custom_chapter_pattern else None
|
221 |
+
}
|
222 |
+
|
223 |
+
if file is None:
|
224 |
+
return "No file uploaded."
|
225 |
+
|
226 |
+
file_path = file.name
|
227 |
+
if not os.path.exists(file_path):
|
228 |
+
return "Uploaded file not found."
|
229 |
+
|
230 |
+
if file_path.lower().endswith('.epub'):
|
231 |
+
status = import_epub(
|
232 |
+
file_path,
|
233 |
+
title,
|
234 |
+
author,
|
235 |
+
keywords,
|
236 |
+
custom_prompt=custom_prompt,
|
237 |
+
system_prompt=system_prompt,
|
238 |
+
summary=None,
|
239 |
+
auto_summarize=auto_summarize,
|
240 |
+
api_name=api_name,
|
241 |
+
api_key=api_key,
|
242 |
+
chunk_options=chunk_options,
|
243 |
+
custom_chapter_pattern=custom_chapter_pattern
|
244 |
+
)
|
245 |
+
return f"📚 EPUB Imported Successfully:\n{status}"
|
246 |
+
elif file.name.lower().endswith('.zip'):
|
247 |
+
status = process_zip_file(
|
248 |
+
zip_file=file,
|
249 |
+
title=title,
|
250 |
+
author=author,
|
251 |
+
keywords=keywords,
|
252 |
+
custom_prompt=custom_prompt,
|
253 |
+
system_prompt=system_prompt,
|
254 |
+
summary=None, # Let the library handle summarization
|
255 |
+
auto_summarize=auto_summarize,
|
256 |
+
api_name=api_name,
|
257 |
+
api_key=api_key,
|
258 |
+
chunk_options=chunk_options
|
259 |
+
)
|
260 |
+
return f"📦 ZIP Processed Successfully:\n{status}"
|
261 |
+
elif file.name.lower().endswith(('.chm', '.html', '.pdf', '.xml', '.opml')):
|
262 |
+
file_type = file.name.split('.')[-1].upper()
|
263 |
+
return f"{file_type} file import is not yet supported."
|
264 |
+
else:
|
265 |
+
return "❌ Unsupported file type. Please upload an `.epub` file or a `.zip` file containing `.epub` files."
|
266 |
|
267 |
+
except ValueError as ve:
|
268 |
+
logging.exception(f"Error parsing input values: {str(ve)}")
|
269 |
+
return f"❌ Error: Invalid input for chunk size or overlap. Please enter valid numbers."
|
270 |
+
except Exception as e:
|
271 |
+
logging.exception(f"Error during file import: {str(e)}")
|
272 |
+
return f"❌ Error during import: {str(e)}"
|
273 |
|
274 |
def read_epub(file_path):
|
275 |
+
"""
|
276 |
+
Reads and extracts text from an EPUB file.
|
|
|
|
|
|
|
|
|
277 |
|
278 |
+
Parameters:
|
279 |
+
- file_path (str): Path to the EPUB file.
|
280 |
+
|
281 |
+
Returns:
|
282 |
+
- str: Extracted text content from the EPUB.
|
283 |
+
"""
|
284 |
+
try:
|
285 |
+
logging.info(f"Reading EPUB file from {file_path}")
|
286 |
+
book = epub.read_epub(file_path)
|
287 |
+
chapters = []
|
288 |
+
for item in book.get_items():
|
289 |
+
if item.get_type() == ebooklib.ITEM_DOCUMENT:
|
290 |
+
chapters.append(item.get_content())
|
291 |
+
|
292 |
+
text = ""
|
293 |
+
for html_content in chapters:
|
294 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
295 |
+
text += soup.get_text(separator='\n\n') + "\n\n"
|
296 |
+
logging.debug("EPUB content extraction completed.")
|
297 |
+
return text
|
298 |
+
except Exception as e:
|
299 |
+
logging.exception(f"Error reading EPUB file: {str(e)}")
|
300 |
+
raise
|
301 |
|
302 |
|
303 |
# Ingest a text file into the database with Title/Author/Keywords
|
|
|
312 |
|
313 |
|
314 |
def ingest_text_file(file_path, title=None, author=None, keywords=None):
|
315 |
+
"""
|
316 |
+
Ingests a plain text file into the database with optional metadata.
|
317 |
+
|
318 |
+
Parameters:
|
319 |
+
- file_path (str): Path to the text file.
|
320 |
+
- title (str, optional): Title of the document.
|
321 |
+
- author (str, optional): Author of the document.
|
322 |
+
- keywords (str, optional): Comma-separated keywords.
|
323 |
+
|
324 |
+
Returns:
|
325 |
+
- str: Status message indicating success or failure.
|
326 |
+
"""
|
327 |
try:
|
328 |
with open(file_path, 'r', encoding='utf-8') as file:
|
329 |
content = file.read()
|
330 |
|
331 |
# Check if it's a converted epub and extract metadata if so
|
332 |
+
if 'epub_converted' in (keywords or '').lower():
|
333 |
extracted_title, extracted_author = extract_epub_metadata(content)
|
334 |
title = title or extracted_title
|
335 |
author = author or extracted_author
|
336 |
+
logging.debug(f"Extracted metadata for converted EPUB - Title: {title}, Author: {author}")
|
337 |
|
338 |
# If title is still not provided, use the filename without extension
|
339 |
if not title:
|
|
|
363 |
ingestion_date=datetime.now().strftime('%Y-%m-%d')
|
364 |
)
|
365 |
|
366 |
+
logging.info(f"Text file '{title}' by {author} ingested successfully.")
|
367 |
return f"Text file '{title}' by {author} ingested successfully."
|
368 |
except Exception as e:
|
369 |
logging.error(f"Error ingesting text file: {str(e)}")
|
|
|
371 |
|
372 |
|
373 |
def ingest_folder(folder_path, keywords=None):
|
374 |
+
"""
|
375 |
+
Ingests all text files within a specified folder.
|
376 |
+
|
377 |
+
Parameters:
|
378 |
+
- folder_path (str): Path to the folder containing text files.
|
379 |
+
- keywords (str, optional): Comma-separated keywords to add to each file.
|
380 |
+
|
381 |
+
Returns:
|
382 |
+
- str: Combined status messages for all ingested text files.
|
383 |
+
"""
|
384 |
results = []
|
385 |
+
try:
|
386 |
+
logging.info(f"Ingesting all text files from folder {folder_path}")
|
387 |
+
for filename in os.listdir(folder_path):
|
388 |
+
if filename.lower().endswith('.txt'):
|
389 |
+
file_path = os.path.join(folder_path, filename)
|
390 |
+
result = ingest_text_file(file_path, keywords=keywords)
|
391 |
+
results.append(result)
|
392 |
+
logging.info("Completed ingestion of all text files in the folder.")
|
393 |
+
except Exception as e:
|
394 |
+
logging.exception(f"Error ingesting folder: {str(e)}")
|
395 |
+
return f"Error ingesting folder: {str(e)}"
|
396 |
+
|
397 |
+
return "\n".join(results)
|
398 |
|
399 |
|
400 |
def epub_to_markdown(epub_path):
|
401 |
+
"""
|
402 |
+
Converts an EPUB file to Markdown format, including the table of contents and chapter contents.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
|
404 |
+
Parameters:
|
405 |
+
- epub_path (str): Path to the EPUB file.
|
406 |
|
407 |
+
Returns:
|
408 |
+
- str: Markdown-formatted content of the EPUB.
|
409 |
+
"""
|
410 |
+
try:
|
411 |
+
logging.info(f"Converting EPUB to Markdown from {epub_path}")
|
412 |
+
book = epub.read_epub(epub_path)
|
413 |
+
markdown_content = "# Table of Contents\n\n"
|
414 |
+
chapters = []
|
415 |
+
|
416 |
+
# Extract and format the table of contents
|
417 |
+
toc = book.toc
|
418 |
+
for item in toc:
|
419 |
+
if isinstance(item, tuple):
|
420 |
+
section, children = item
|
421 |
+
level = 1
|
422 |
+
markdown_content += format_toc_item(section, level)
|
423 |
+
for child in children:
|
424 |
+
markdown_content += format_toc_item(child, level + 1)
|
425 |
+
else:
|
426 |
+
markdown_content += format_toc_item(item, 1)
|
427 |
+
|
428 |
+
markdown_content += "\n---\n\n"
|
429 |
+
|
430 |
+
# Process each chapter
|
431 |
+
for item in book.get_items():
|
432 |
+
if item.get_type() == ebooklib.ITEM_DOCUMENT:
|
433 |
+
chapter_content = item.get_content().decode('utf-8')
|
434 |
+
soup = BeautifulSoup(chapter_content, 'html.parser')
|
435 |
+
|
436 |
+
# Extract chapter title
|
437 |
+
title = soup.find(['h1', 'h2', 'h3'])
|
438 |
+
if title:
|
439 |
+
chapter_title = title.get_text()
|
440 |
+
markdown_content += f"# {chapter_title}\n\n"
|
441 |
+
|
442 |
+
# Process chapter content
|
443 |
+
for elem in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol']):
|
444 |
+
if elem.name.startswith('h'):
|
445 |
+
level = int(elem.name[1])
|
446 |
+
markdown_content += f"{'#' * level} {elem.get_text()}\n\n"
|
447 |
+
elif elem.name == 'p':
|
448 |
+
markdown_content += f"{elem.get_text()}\n\n"
|
449 |
+
elif elem.name in ['ul', 'ol']:
|
450 |
+
for li in elem.find_all('li'):
|
451 |
+
prefix = '-' if elem.name == 'ul' else '1.'
|
452 |
+
markdown_content += f"{prefix} {li.get_text()}\n"
|
453 |
+
markdown_content += "\n"
|
454 |
+
|
455 |
+
markdown_content += "---\n\n"
|
456 |
+
|
457 |
+
logging.debug("EPUB to Markdown conversion completed.")
|
458 |
+
return markdown_content
|
459 |
|
460 |
+
except Exception as e:
|
461 |
+
logging.exception(f"Error converting EPUB to Markdown: {str(e)}")
|
462 |
+
raise
|
|
|
|
|
463 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
|
465 |
+
def format_toc_item(item, level):
|
466 |
+
"""
|
467 |
+
Formats a table of contents item into Markdown list format.
|
468 |
|
469 |
+
Parameters:
|
470 |
+
- item (epub.Link or epub.Section): TOC item.
|
471 |
+
- level (int): Heading level for indentation.
|
472 |
|
473 |
+
Returns:
|
474 |
+
- str: Markdown-formatted TOC item.
|
475 |
+
"""
|
476 |
+
try:
|
477 |
+
if isinstance(item, epub.Link):
|
478 |
+
title = item.title
|
479 |
+
elif isinstance(item, epub.Section):
|
480 |
+
title = item.title
|
481 |
+
else:
|
482 |
+
title = str(item)
|
483 |
|
484 |
+
return f"{' ' * (level - 1)}- [{title}](#{slugify(title)})\n"
|
485 |
+
except Exception as e:
|
486 |
+
logging.exception(f"Error formatting TOC item: {str(e)}")
|
487 |
+
return ""
|
488 |
|
489 |
|
490 |
def slugify(text):
|
491 |
+
"""
|
492 |
+
Converts a string into a slug suitable for Markdown links.
|
493 |
+
|
494 |
+
Parameters:
|
495 |
+
- text (str): The text to slugify.
|
496 |
+
|
497 |
+
Returns:
|
498 |
+
- str: Slugified text.
|
499 |
+
"""
|
500 |
+
return re.sub(r'[\W_]+', '-', text.lower()).strip('-')
|
501 |
|
502 |
#
|
503 |
# End of Function Definitions
|
App_Function_Libraries/Character_Chat/Character_Chat_Lib.py
ADDED
@@ -0,0 +1,541 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Character_Chat_Lib.py
|
2 |
+
# Description: Functions for character chat cards.
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
import json
|
6 |
+
import logging
|
7 |
+
import io
|
8 |
+
import base64
|
9 |
+
from typing import Dict, Any, Optional, List, Tuple
|
10 |
+
#
|
11 |
+
# External Imports
|
12 |
+
from PIL import Image
|
13 |
+
#
|
14 |
+
# Local imports
|
15 |
+
from App_Function_Libraries.DB.DB_Manager import get_character_card_by_id, get_character_chat_by_id
|
16 |
+
#
|
17 |
+
# Constants
|
18 |
+
####################################################################################################
|
19 |
+
#
|
20 |
+
# Functions
|
21 |
+
|
22 |
+
# Using https://github.com/malfoyslastname/character-card-spec-v2 as the standard for v2 character cards
|
23 |
+
|
24 |
+
#################################################################################
|
25 |
+
#
|
26 |
+
# Placeholder functions:
|
27 |
+
|
28 |
+
def replace_placeholders(text: str, char_name: str, user_name: str) -> str:
|
29 |
+
"""
|
30 |
+
Replace placeholders in the given text with appropriate values.
|
31 |
+
|
32 |
+
Args:
|
33 |
+
text (str): The text containing placeholders.
|
34 |
+
char_name (str): The name of the character.
|
35 |
+
user_name (str): The name of the user.
|
36 |
+
|
37 |
+
Returns:
|
38 |
+
str: The text with placeholders replaced.
|
39 |
+
"""
|
40 |
+
replacements = {
|
41 |
+
'{{char}}': char_name,
|
42 |
+
'{{user}}': user_name,
|
43 |
+
'{{random_user}}': user_name # Assuming random_user is the same as user for simplicity
|
44 |
+
}
|
45 |
+
|
46 |
+
for placeholder, value in replacements.items():
|
47 |
+
text = text.replace(placeholder, value)
|
48 |
+
|
49 |
+
return text
|
50 |
+
|
51 |
+
def replace_user_placeholder(history, user_name):
|
52 |
+
"""
|
53 |
+
Replaces all instances of '{{user}}' in the chat history with the actual user name.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
history (list): The current chat history as a list of tuples (user_message, bot_message).
|
57 |
+
user_name (str): The name entered by the user.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
list: Updated chat history with placeholders replaced.
|
61 |
+
"""
|
62 |
+
if not user_name:
|
63 |
+
user_name = "User" # Default name if none provided
|
64 |
+
|
65 |
+
updated_history = []
|
66 |
+
for user_msg, bot_msg in history:
|
67 |
+
# Replace in user message
|
68 |
+
if user_msg:
|
69 |
+
user_msg = user_msg.replace("{{user}}", user_name)
|
70 |
+
# Replace in bot message
|
71 |
+
if bot_msg:
|
72 |
+
bot_msg = bot_msg.replace("{{user}}", user_name)
|
73 |
+
updated_history.append((user_msg, bot_msg))
|
74 |
+
return updated_history
|
75 |
+
|
76 |
+
#
|
77 |
+
# End of Placeholder functions
|
78 |
+
#################################################################################
|
79 |
+
|
80 |
+
#################################################################################
|
81 |
+
#
|
82 |
+
# f
|
83 |
+
|
84 |
+
def extract_character_id(choice: str) -> int:
|
85 |
+
"""Extract the character ID from the dropdown selection string."""
|
86 |
+
return int(choice.split('(ID: ')[1].rstrip(')'))
|
87 |
+
|
88 |
+
def load_character_wrapper(character_id: int, user_name: str) -> Tuple[Dict[str, Any], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
|
89 |
+
"""Wrapper function to load character and image using the extracted ID."""
|
90 |
+
char_data, chat_history, img = load_character_and_image(character_id, user_name)
|
91 |
+
return char_data, chat_history, img
|
92 |
+
|
93 |
+
def parse_character_book(book_data: Dict[str, Any]) -> Dict[str, Any]:
|
94 |
+
"""
|
95 |
+
Parse the character book data from a V2 character card.
|
96 |
+
|
97 |
+
Args:
|
98 |
+
book_data (Dict[str, Any]): The raw character book data from the character card.
|
99 |
+
|
100 |
+
Returns:
|
101 |
+
Dict[str, Any]: The parsed and structured character book data.
|
102 |
+
"""
|
103 |
+
parsed_book = {
|
104 |
+
'name': book_data.get('name', ''),
|
105 |
+
'description': book_data.get('description', ''),
|
106 |
+
'scan_depth': book_data.get('scan_depth'),
|
107 |
+
'token_budget': book_data.get('token_budget'),
|
108 |
+
'recursive_scanning': book_data.get('recursive_scanning', False),
|
109 |
+
'extensions': book_data.get('extensions', {}),
|
110 |
+
'entries': []
|
111 |
+
}
|
112 |
+
|
113 |
+
for entry in book_data.get('entries', []):
|
114 |
+
parsed_entry = {
|
115 |
+
'keys': entry['keys'],
|
116 |
+
'content': entry['content'],
|
117 |
+
'extensions': entry.get('extensions', {}),
|
118 |
+
'enabled': entry['enabled'],
|
119 |
+
'insertion_order': entry['insertion_order'],
|
120 |
+
'case_sensitive': entry.get('case_sensitive', False),
|
121 |
+
'name': entry.get('name', ''),
|
122 |
+
'priority': entry.get('priority'),
|
123 |
+
'id': entry.get('id'),
|
124 |
+
'comment': entry.get('comment', ''),
|
125 |
+
'selective': entry.get('selective', False),
|
126 |
+
'secondary_keys': entry.get('secondary_keys', []),
|
127 |
+
'constant': entry.get('constant', False),
|
128 |
+
'position': entry.get('position')
|
129 |
+
}
|
130 |
+
parsed_book['entries'].append(parsed_entry)
|
131 |
+
|
132 |
+
return parsed_book
|
133 |
+
|
134 |
+
def load_character_and_image(character_id: int, user_name: str) -> Tuple[Optional[Dict[str, Any]], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
|
135 |
+
"""
|
136 |
+
Load a character and its associated image based on the character ID.
|
137 |
+
|
138 |
+
Args:
|
139 |
+
character_id (int): The ID of the character to load.
|
140 |
+
user_name (str): The name of the user, used for placeholder replacement.
|
141 |
+
|
142 |
+
Returns:
|
143 |
+
Tuple[Optional[Dict[str, Any]], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
|
144 |
+
A tuple containing the character data, chat history, and character image (if available).
|
145 |
+
"""
|
146 |
+
try:
|
147 |
+
char_data = get_character_card_by_id(character_id)
|
148 |
+
if not char_data:
|
149 |
+
logging.warning(f"No character data found for ID: {character_id}")
|
150 |
+
return None, [], None
|
151 |
+
|
152 |
+
# Replace placeholders in character data
|
153 |
+
for field in ['first_mes', 'mes_example', 'scenario', 'description', 'personality']:
|
154 |
+
if field in char_data:
|
155 |
+
char_data[field] = replace_placeholders(char_data[field], char_data['name'], user_name)
|
156 |
+
|
157 |
+
# Replace placeholders in first_mes
|
158 |
+
first_mes = char_data.get('first_mes', "Hello! I'm ready to chat.")
|
159 |
+
first_mes = replace_placeholders(first_mes, char_data['name'], user_name)
|
160 |
+
|
161 |
+
chat_history = [(None, first_mes)] if first_mes else []
|
162 |
+
|
163 |
+
img = None
|
164 |
+
if char_data.get('image'):
|
165 |
+
try:
|
166 |
+
image_data = base64.b64decode(char_data['image'])
|
167 |
+
img = Image.open(io.BytesIO(image_data)).convert("RGBA")
|
168 |
+
except Exception as e:
|
169 |
+
logging.error(f"Error processing image for character '{char_data['name']}': {e}")
|
170 |
+
|
171 |
+
return char_data, chat_history, img
|
172 |
+
|
173 |
+
except Exception as e:
|
174 |
+
logging.error(f"Error in load_character_and_image: {e}")
|
175 |
+
return None, [], None
|
176 |
+
|
177 |
+
def load_chat_and_character(chat_id: int, user_name: str) -> Tuple[Optional[Dict[str, Any]], List[Tuple[str, str]], Optional[Image.Image]]:
|
178 |
+
"""
|
179 |
+
Load a chat and its associated character, including the character image and process templates.
|
180 |
+
|
181 |
+
Args:
|
182 |
+
chat_id (int): The ID of the chat to load.
|
183 |
+
user_name (str): The name of the user.
|
184 |
+
|
185 |
+
Returns:
|
186 |
+
Tuple[Optional[Dict[str, Any]], List[Tuple[str, str]], Optional[Image.Image]]:
|
187 |
+
A tuple containing the character data, processed chat history, and character image (if available).
|
188 |
+
"""
|
189 |
+
try:
|
190 |
+
# Load the chat
|
191 |
+
chat = get_character_chat_by_id(chat_id)
|
192 |
+
if not chat:
|
193 |
+
logging.warning(f"No chat found with ID: {chat_id}")
|
194 |
+
return None, [], None
|
195 |
+
|
196 |
+
# Load the associated character
|
197 |
+
character_id = chat['character_id']
|
198 |
+
char_data = get_character_card_by_id(character_id)
|
199 |
+
if not char_data:
|
200 |
+
logging.warning(f"No character found for chat ID: {chat_id}")
|
201 |
+
return None, chat['chat_history'], None
|
202 |
+
|
203 |
+
# Process the chat history
|
204 |
+
processed_history = process_chat_history(chat['chat_history'], char_data['name'], user_name)
|
205 |
+
|
206 |
+
# Load the character image
|
207 |
+
img = None
|
208 |
+
if char_data.get('image'):
|
209 |
+
try:
|
210 |
+
image_data = base64.b64decode(char_data['image'])
|
211 |
+
img = Image.open(io.BytesIO(image_data)).convert("RGBA")
|
212 |
+
except Exception as e:
|
213 |
+
logging.error(f"Error processing image for character '{char_data['name']}': {e}")
|
214 |
+
|
215 |
+
# Process character data templates
|
216 |
+
for field in ['first_mes', 'mes_example', 'scenario', 'description', 'personality']:
|
217 |
+
if field in char_data:
|
218 |
+
char_data[field] = replace_placeholders(char_data[field], char_data['name'], user_name)
|
219 |
+
|
220 |
+
return char_data, processed_history, img
|
221 |
+
|
222 |
+
except Exception as e:
|
223 |
+
logging.error(f"Error in load_chat_and_character: {e}")
|
224 |
+
return None, [], None
|
225 |
+
|
226 |
+
def extract_json_from_image(image_file):
|
227 |
+
logging.debug(f"Attempting to extract JSON from image: {image_file.name}")
|
228 |
+
try:
|
229 |
+
with Image.open(image_file) as img:
|
230 |
+
logging.debug("Image opened successfully")
|
231 |
+
metadata = img.info
|
232 |
+
if 'chara' in metadata:
|
233 |
+
logging.debug("Found 'chara' in image metadata")
|
234 |
+
chara_content = metadata['chara']
|
235 |
+
logging.debug(f"Content of 'chara' metadata (first 100 chars): {chara_content[:100]}...")
|
236 |
+
try:
|
237 |
+
decoded_content = base64.b64decode(chara_content).decode('utf-8')
|
238 |
+
logging.debug(f"Decoded content (first 100 chars): {decoded_content[:100]}...")
|
239 |
+
return decoded_content
|
240 |
+
except Exception as e:
|
241 |
+
logging.error(f"Error decoding base64 content: {e}")
|
242 |
+
|
243 |
+
logging.warning("'chara' not found in metadata, attempting to find JSON data in image bytes")
|
244 |
+
# Alternative method to extract embedded JSON from image bytes if metadata is not available
|
245 |
+
img_byte_arr = io.BytesIO()
|
246 |
+
img.save(img_byte_arr, format='PNG')
|
247 |
+
img_bytes = img_byte_arr.getvalue()
|
248 |
+
img_str = img_bytes.decode('latin1') # Use 'latin1' to preserve byte values
|
249 |
+
|
250 |
+
# Search for JSON-like structures in the image bytes
|
251 |
+
json_start = img_str.find('{')
|
252 |
+
json_end = img_str.rfind('}')
|
253 |
+
if json_start != -1 and json_end != -1 and json_end > json_start:
|
254 |
+
possible_json = img_str[json_start:json_end+1]
|
255 |
+
try:
|
256 |
+
json.loads(possible_json)
|
257 |
+
logging.debug("Found JSON data in image bytes")
|
258 |
+
return possible_json
|
259 |
+
except json.JSONDecodeError:
|
260 |
+
logging.debug("No valid JSON found in image bytes")
|
261 |
+
|
262 |
+
logging.warning("No JSON data found in the image")
|
263 |
+
except Exception as e:
|
264 |
+
logging.error(f"Error extracting JSON from image: {e}")
|
265 |
+
return None
|
266 |
+
|
267 |
+
|
268 |
+
|
269 |
+
def load_chat_history(file):
|
270 |
+
try:
|
271 |
+
content = file.read().decode('utf-8')
|
272 |
+
chat_data = json.loads(content)
|
273 |
+
|
274 |
+
# Extract history and character name from the loaded data
|
275 |
+
history = chat_data.get('history') or chat_data.get('messages')
|
276 |
+
character_name = chat_data.get('character') or chat_data.get('character_name')
|
277 |
+
|
278 |
+
if not history or not character_name:
|
279 |
+
logging.error("Chat history or character name missing in the imported file.")
|
280 |
+
return None, None
|
281 |
+
|
282 |
+
return history, character_name
|
283 |
+
except Exception as e:
|
284 |
+
logging.error(f"Error loading chat history: {e}")
|
285 |
+
return None, None
|
286 |
+
|
287 |
+
|
288 |
+
def process_chat_history(chat_history: List[Tuple[str, str]], char_name: str, user_name: str) -> List[Tuple[str, str]]:
|
289 |
+
"""
|
290 |
+
Process the chat history to replace placeholders in both user and character messages.
|
291 |
+
|
292 |
+
Args:
|
293 |
+
chat_history (List[Tuple[str, str]]): The chat history.
|
294 |
+
char_name (str): The name of the character.
|
295 |
+
user_name (str): The name of the user.
|
296 |
+
|
297 |
+
Returns:
|
298 |
+
List[Tuple[str, str]]: The processed chat history.
|
299 |
+
"""
|
300 |
+
processed_history = []
|
301 |
+
for user_msg, char_msg in chat_history:
|
302 |
+
if user_msg:
|
303 |
+
user_msg = replace_placeholders(user_msg, char_name, user_name)
|
304 |
+
if char_msg:
|
305 |
+
char_msg = replace_placeholders(char_msg, char_name, user_name)
|
306 |
+
processed_history.append((user_msg, char_msg))
|
307 |
+
return processed_history
|
308 |
+
|
309 |
+
def validate_character_book(book_data):
|
310 |
+
"""
|
311 |
+
Validate the 'character_book' field in the character card.
|
312 |
+
|
313 |
+
Args:
|
314 |
+
book_data (dict): The character book data.
|
315 |
+
|
316 |
+
Returns:
|
317 |
+
Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
|
318 |
+
"""
|
319 |
+
validation_messages = []
|
320 |
+
|
321 |
+
# Optional fields with expected types
|
322 |
+
optional_fields = {
|
323 |
+
'name': str,
|
324 |
+
'description': str,
|
325 |
+
'scan_depth': (int, float),
|
326 |
+
'token_budget': (int, float),
|
327 |
+
'recursive_scanning': bool,
|
328 |
+
'extensions': dict,
|
329 |
+
'entries': list
|
330 |
+
}
|
331 |
+
|
332 |
+
for field, expected_type in optional_fields.items():
|
333 |
+
if field in book_data:
|
334 |
+
if not isinstance(book_data[field], expected_type):
|
335 |
+
validation_messages.append(f"Field 'character_book.{field}' must be of type '{expected_type}'.")
|
336 |
+
# 'entries' is required
|
337 |
+
if 'entries' not in book_data or not isinstance(book_data['entries'], list):
|
338 |
+
validation_messages.append("Field 'character_book.entries' is required and must be a list.")
|
339 |
+
return False, validation_messages
|
340 |
+
|
341 |
+
# Validate each entry in 'entries'
|
342 |
+
entries = book_data.get('entries', [])
|
343 |
+
entry_ids = set()
|
344 |
+
for idx, entry in enumerate(entries):
|
345 |
+
is_valid_entry, entry_messages = validate_character_book_entry(entry, idx, entry_ids)
|
346 |
+
if not is_valid_entry:
|
347 |
+
validation_messages.extend(entry_messages)
|
348 |
+
|
349 |
+
is_valid = len(validation_messages) == 0
|
350 |
+
return is_valid, validation_messages
|
351 |
+
|
352 |
+
def validate_character_book_entry(entry, idx, entry_ids):
|
353 |
+
"""
|
354 |
+
Validate an entry in the 'character_book.entries' list.
|
355 |
+
|
356 |
+
Args:
|
357 |
+
entry (dict): The entry data.
|
358 |
+
idx (int): The index of the entry in the list.
|
359 |
+
entry_ids (set): A set of existing entry IDs for uniqueness checking.
|
360 |
+
|
361 |
+
Returns:
|
362 |
+
Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
|
363 |
+
"""
|
364 |
+
validation_messages = []
|
365 |
+
required_fields = {
|
366 |
+
'keys': list,
|
367 |
+
'content': str,
|
368 |
+
'extensions': dict,
|
369 |
+
'enabled': bool,
|
370 |
+
'insertion_order': (int, float)
|
371 |
+
}
|
372 |
+
|
373 |
+
for field, expected_type in required_fields.items():
|
374 |
+
if field not in entry:
|
375 |
+
validation_messages.append(f"Entry {idx}: Missing required field '{field}'.")
|
376 |
+
elif not isinstance(entry[field], expected_type):
|
377 |
+
validation_messages.append(f"Entry {idx}: Field '{field}' must be of type '{expected_type}'.")
|
378 |
+
elif field == 'content' and not entry[field].strip():
|
379 |
+
validation_messages.append(f"Entry {idx}: Field 'content' cannot be empty.")
|
380 |
+
elif field == 'keys' and not entry[field]:
|
381 |
+
validation_messages.append(f"Entry {idx}: Field 'keys' cannot be empty.")
|
382 |
+
|
383 |
+
# Optional fields
|
384 |
+
optional_fields = {
|
385 |
+
'case_sensitive': bool,
|
386 |
+
'name': str,
|
387 |
+
'priority': (int, float),
|
388 |
+
'id': (int, float),
|
389 |
+
'comment': str,
|
390 |
+
'selective': bool,
|
391 |
+
'secondary_keys': list,
|
392 |
+
'constant': bool,
|
393 |
+
'position': str # Should be 'before_char' or 'after_char'
|
394 |
+
}
|
395 |
+
|
396 |
+
for field, expected_type in optional_fields.items():
|
397 |
+
if field in entry and not isinstance(entry[field], expected_type):
|
398 |
+
validation_messages.append(f"Entry {idx}: Field '{field}' must be of type '{expected_type}'.")
|
399 |
+
|
400 |
+
# Validate 'position' value if present
|
401 |
+
if 'position' in entry:
|
402 |
+
if entry['position'] not in ['before_char', 'after_char']:
|
403 |
+
validation_messages.append(f"Entry {idx}: Field 'position' must be 'before_char' or 'after_char'.")
|
404 |
+
|
405 |
+
# Validate 'secondary_keys' if 'selective' is True
|
406 |
+
if entry.get('selective', False):
|
407 |
+
if 'secondary_keys' not in entry or not isinstance(entry['secondary_keys'], list):
|
408 |
+
validation_messages.append(f"Entry {idx}: 'secondary_keys' must be a list when 'selective' is True.")
|
409 |
+
elif not entry['secondary_keys']:
|
410 |
+
validation_messages.append(f"Entry {idx}: 'secondary_keys' cannot be empty when 'selective' is True.")
|
411 |
+
|
412 |
+
# Validate 'keys' list elements
|
413 |
+
if 'keys' in entry and isinstance(entry['keys'], list):
|
414 |
+
for i, key in enumerate(entry['keys']):
|
415 |
+
if not isinstance(key, str) or not key.strip():
|
416 |
+
validation_messages.append(f"Entry {idx}: Element {i} in 'keys' must be a non-empty string.")
|
417 |
+
|
418 |
+
# Validate 'secondary_keys' list elements
|
419 |
+
if 'secondary_keys' in entry and isinstance(entry['secondary_keys'], list):
|
420 |
+
for i, key in enumerate(entry['secondary_keys']):
|
421 |
+
if not isinstance(key, str) or not key.strip():
|
422 |
+
validation_messages.append(f"Entry {idx}: Element {i} in 'secondary_keys' must be a non-empty string.")
|
423 |
+
|
424 |
+
# Validate 'id' uniqueness
|
425 |
+
if 'id' in entry:
|
426 |
+
entry_id = entry['id']
|
427 |
+
if entry_id in entry_ids:
|
428 |
+
validation_messages.append \
|
429 |
+
(f"Entry {idx}: Duplicate 'id' value '{entry_id}'. Each entry 'id' must be unique.")
|
430 |
+
else:
|
431 |
+
entry_ids.add(entry_id)
|
432 |
+
|
433 |
+
# Validate 'extensions' keys are namespaced
|
434 |
+
if 'extensions' in entry and isinstance(entry['extensions'], dict):
|
435 |
+
for key in entry['extensions'].keys():
|
436 |
+
if '/' not in key and '_' not in key:
|
437 |
+
validation_messages.append \
|
438 |
+
(f"Entry {idx}: Extension key '{key}' in 'extensions' should be namespaced to prevent conflicts.")
|
439 |
+
|
440 |
+
is_valid = len(validation_messages) == 0
|
441 |
+
return is_valid, validation_messages
|
442 |
+
|
443 |
+
def validate_v2_card(card_data):
|
444 |
+
"""
|
445 |
+
Validate a character card according to the V2 specification.
|
446 |
+
|
447 |
+
Args:
|
448 |
+
card_data (dict): The parsed character card data.
|
449 |
+
|
450 |
+
Returns:
|
451 |
+
Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
|
452 |
+
"""
|
453 |
+
validation_messages = []
|
454 |
+
|
455 |
+
# Check top-level fields
|
456 |
+
if 'spec' not in card_data:
|
457 |
+
validation_messages.append("Missing 'spec' field.")
|
458 |
+
elif card_data['spec'] != 'chara_card_v2':
|
459 |
+
validation_messages.append(f"Invalid 'spec' value: {card_data['spec']}. Expected 'chara_card_v2'.")
|
460 |
+
|
461 |
+
if 'spec_version' not in card_data:
|
462 |
+
validation_messages.append("Missing 'spec_version' field.")
|
463 |
+
else:
|
464 |
+
# Ensure 'spec_version' is '2.0' or higher
|
465 |
+
try:
|
466 |
+
spec_version = float(card_data['spec_version'])
|
467 |
+
if spec_version < 2.0:
|
468 |
+
validation_messages.append \
|
469 |
+
(f"'spec_version' must be '2.0' or higher. Found '{card_data['spec_version']}'.")
|
470 |
+
except ValueError:
|
471 |
+
validation_messages.append \
|
472 |
+
(f"Invalid 'spec_version' format: {card_data['spec_version']}. Must be a number as a string.")
|
473 |
+
|
474 |
+
if 'data' not in card_data:
|
475 |
+
validation_messages.append("Missing 'data' field.")
|
476 |
+
return False, validation_messages # Cannot proceed without 'data' field
|
477 |
+
|
478 |
+
data = card_data['data']
|
479 |
+
|
480 |
+
# Required fields in 'data'
|
481 |
+
required_fields = ['name', 'description', 'personality', 'scenario', 'first_mes', 'mes_example']
|
482 |
+
for field in required_fields:
|
483 |
+
if field not in data:
|
484 |
+
validation_messages.append(f"Missing required field in 'data': '{field}'.")
|
485 |
+
elif not isinstance(data[field], str):
|
486 |
+
validation_messages.append(f"Field '{field}' must be a string.")
|
487 |
+
elif not data[field].strip():
|
488 |
+
validation_messages.append(f"Field '{field}' cannot be empty.")
|
489 |
+
|
490 |
+
# Optional fields with expected types
|
491 |
+
optional_fields = {
|
492 |
+
'creator_notes': str,
|
493 |
+
'system_prompt': str,
|
494 |
+
'post_history_instructions': str,
|
495 |
+
'alternate_greetings': list,
|
496 |
+
'tags': list,
|
497 |
+
'creator': str,
|
498 |
+
'character_version': str,
|
499 |
+
'extensions': dict,
|
500 |
+
'character_book': dict # If present, should be a dict
|
501 |
+
}
|
502 |
+
|
503 |
+
for field, expected_type in optional_fields.items():
|
504 |
+
if field in data:
|
505 |
+
if not isinstance(data[field], expected_type):
|
506 |
+
validation_messages.append(f"Field '{field}' must be of type '{expected_type.__name__}'.")
|
507 |
+
elif field == 'extensions':
|
508 |
+
# Validate that extensions keys are properly namespaced
|
509 |
+
for key in data[field].keys():
|
510 |
+
if '/' not in key and '_' not in key:
|
511 |
+
validation_messages.append \
|
512 |
+
(f"Extension key '{key}' in 'extensions' should be namespaced to prevent conflicts.")
|
513 |
+
|
514 |
+
# If 'alternate_greetings' is present, check that it's a list of non-empty strings
|
515 |
+
if 'alternate_greetings' in data and isinstance(data['alternate_greetings'], list):
|
516 |
+
for idx, greeting in enumerate(data['alternate_greetings']):
|
517 |
+
if not isinstance(greeting, str) or not greeting.strip():
|
518 |
+
validation_messages.append(f"Element {idx} in 'alternate_greetings' must be a non-empty string.")
|
519 |
+
|
520 |
+
# If 'tags' is present, check that it's a list of non-empty strings
|
521 |
+
if 'tags' in data and isinstance(data['tags'], list):
|
522 |
+
for idx, tag in enumerate(data['tags']):
|
523 |
+
if not isinstance(tag, str) or not tag.strip():
|
524 |
+
validation_messages.append(f"Element {idx} in 'tags' must be a non-empty string.")
|
525 |
+
|
526 |
+
# Validate 'extensions' field
|
527 |
+
if 'extensions' in data and not isinstance(data['extensions'], dict):
|
528 |
+
validation_messages.append("Field 'extensions' must be a dictionary.")
|
529 |
+
|
530 |
+
# Validate 'character_book' if present
|
531 |
+
if 'character_book' in data:
|
532 |
+
is_valid_book, book_messages = validate_character_book(data['character_book'])
|
533 |
+
if not is_valid_book:
|
534 |
+
validation_messages.extend(book_messages)
|
535 |
+
|
536 |
+
is_valid = len(validation_messages) == 0
|
537 |
+
return is_valid, validation_messages
|
538 |
+
|
539 |
+
#
|
540 |
+
# End of File
|
541 |
+
####################################################################################################
|
App_Function_Libraries/Character_Chat/__init__.py
ADDED
File without changes
|
App_Function_Libraries/DB/Character_Chat_DB.py
CHANGED
@@ -1,684 +1,701 @@
|
|
1 |
-
# character_chat_db.py
|
2 |
-
# Database functions for managing character cards and chat histories.
|
3 |
-
# #
|
4 |
-
# Imports
|
5 |
-
import configparser
|
6 |
-
import sqlite3
|
7 |
-
import json
|
8 |
-
import os
|
9 |
-
import sys
|
10 |
-
from typing import List, Dict, Optional, Tuple, Any, Union
|
11 |
-
|
12 |
-
from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
|
13 |
-
import logging
|
14 |
-
|
15 |
-
#
|
16 |
-
#######################################################################################################################
|
17 |
-
#
|
18 |
-
#
|
19 |
-
|
20 |
-
def ensure_database_directory():
|
21 |
-
os.makedirs(get_database_dir(), exist_ok=True)
|
22 |
-
|
23 |
-
ensure_database_directory()
|
24 |
-
|
25 |
-
|
26 |
-
# Construct the path to the config file
|
27 |
-
config_path = get_project_relative_path('Config_Files/config.txt')
|
28 |
-
|
29 |
-
# Read the config file
|
30 |
-
config = configparser.ConfigParser()
|
31 |
-
config.read(config_path)
|
32 |
-
|
33 |
-
# Get the chat db path from the config, or use the default if not specified
|
34 |
-
chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
|
35 |
-
print(f"Chat Database path: {chat_DB_PATH}")
|
36 |
-
|
37 |
-
########################################################################################################
|
38 |
-
#
|
39 |
-
# Functions
|
40 |
-
|
41 |
-
# FIXME - Setup properly and test/add documentation for its existence...
|
42 |
-
def initialize_database():
|
43 |
-
"""Initialize the SQLite database with required tables and FTS5 virtual tables."""
|
44 |
-
conn = None
|
45 |
-
try:
|
46 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
47 |
-
cursor = conn.cursor()
|
48 |
-
|
49 |
-
# Enable foreign key constraints
|
50 |
-
cursor.execute("PRAGMA foreign_keys = ON;")
|
51 |
-
|
52 |
-
# Create CharacterCards table with V2 fields
|
53 |
-
cursor.execute("""
|
54 |
-
CREATE TABLE IF NOT EXISTS CharacterCards (
|
55 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
56 |
-
name TEXT UNIQUE NOT NULL,
|
57 |
-
description TEXT,
|
58 |
-
personality TEXT,
|
59 |
-
scenario TEXT,
|
60 |
-
image BLOB,
|
61 |
-
post_history_instructions TEXT,
|
62 |
-
first_mes TEXT,
|
63 |
-
mes_example TEXT,
|
64 |
-
creator_notes TEXT,
|
65 |
-
system_prompt TEXT,
|
66 |
-
alternate_greetings TEXT,
|
67 |
-
tags TEXT,
|
68 |
-
creator TEXT,
|
69 |
-
character_version TEXT,
|
70 |
-
extensions TEXT,
|
71 |
-
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
72 |
-
);
|
73 |
-
""")
|
74 |
-
|
75 |
-
# Create CharacterChats table
|
76 |
-
cursor.execute("""
|
77 |
-
CREATE TABLE IF NOT EXISTS CharacterChats (
|
78 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
79 |
-
character_id INTEGER NOT NULL,
|
80 |
-
conversation_name TEXT,
|
81 |
-
chat_history TEXT,
|
82 |
-
is_snapshot BOOLEAN DEFAULT FALSE,
|
83 |
-
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
84 |
-
FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
|
85 |
-
);
|
86 |
-
""")
|
87 |
-
|
88 |
-
# Create FTS5 virtual table for CharacterChats
|
89 |
-
cursor.execute("""
|
90 |
-
CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
|
91 |
-
conversation_name,
|
92 |
-
chat_history,
|
93 |
-
content='CharacterChats',
|
94 |
-
content_rowid='id'
|
95 |
-
);
|
96 |
-
""")
|
97 |
-
|
98 |
-
# Create triggers to keep FTS5 table in sync with CharacterChats
|
99 |
-
cursor.executescript("""
|
100 |
-
CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
|
101 |
-
INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
|
102 |
-
VALUES (new.id, new.conversation_name, new.chat_history);
|
103 |
-
END;
|
104 |
-
|
105 |
-
CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
|
106 |
-
DELETE FROM CharacterChats_fts WHERE rowid = old.id;
|
107 |
-
END;
|
108 |
-
|
109 |
-
CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
|
110 |
-
UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
|
111 |
-
WHERE rowid = new.id;
|
112 |
-
END;
|
113 |
-
""")
|
114 |
-
|
115 |
-
# Create ChatKeywords table
|
116 |
-
cursor.execute("""
|
117 |
-
CREATE TABLE IF NOT EXISTS ChatKeywords (
|
118 |
-
chat_id INTEGER NOT NULL,
|
119 |
-
keyword TEXT NOT NULL,
|
120 |
-
FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
|
121 |
-
);
|
122 |
-
""")
|
123 |
-
|
124 |
-
# Create indexes for faster searches
|
125 |
-
cursor.execute("""
|
126 |
-
CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
|
127 |
-
""")
|
128 |
-
cursor.execute("""
|
129 |
-
CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
|
130 |
-
""")
|
131 |
-
|
132 |
-
conn.commit()
|
133 |
-
logging.info("Database initialized successfully.")
|
134 |
-
except sqlite3.Error as e:
|
135 |
-
logging.error(f"SQLite error occurred during database initialization: {e}")
|
136 |
-
if conn:
|
137 |
-
conn.rollback()
|
138 |
-
raise
|
139 |
-
except Exception as e:
|
140 |
-
logging.error(f"Unexpected error occurred during database initialization: {e}")
|
141 |
-
if conn:
|
142 |
-
conn.rollback()
|
143 |
-
raise
|
144 |
-
finally:
|
145 |
-
if conn:
|
146 |
-
conn.close()
|
147 |
-
|
148 |
-
# Call initialize_database() at the start of your application
|
149 |
-
def setup_chat_database():
|
150 |
-
try:
|
151 |
-
initialize_database()
|
152 |
-
except Exception as e:
|
153 |
-
logging.critical(f"Failed to initialize database: {e}")
|
154 |
-
sys.exit(1)
|
155 |
-
|
156 |
-
setup_chat_database()
|
157 |
-
|
158 |
-
########################################################################################################
|
159 |
-
#
|
160 |
-
# Character Card handling
|
161 |
-
|
162 |
-
def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
|
163 |
-
"""Parse and validate a character card according to V2 specification."""
|
164 |
-
v2_data = {
|
165 |
-
'name': card_data.get('name', ''),
|
166 |
-
'description': card_data.get('description', ''),
|
167 |
-
'personality': card_data.get('personality', ''),
|
168 |
-
'scenario': card_data.get('scenario', ''),
|
169 |
-
'first_mes': card_data.get('first_mes', ''),
|
170 |
-
'mes_example': card_data.get('mes_example', ''),
|
171 |
-
'creator_notes': card_data.get('creator_notes', ''),
|
172 |
-
'system_prompt': card_data.get('system_prompt', ''),
|
173 |
-
'post_history_instructions': card_data.get('post_history_instructions', ''),
|
174 |
-
'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
|
175 |
-
'tags': json.dumps(card_data.get('tags', [])),
|
176 |
-
'creator': card_data.get('creator', ''),
|
177 |
-
'character_version': card_data.get('character_version', ''),
|
178 |
-
'extensions': json.dumps(card_data.get('extensions', {}))
|
179 |
-
}
|
180 |
-
|
181 |
-
# Handle 'image' separately as it might be binary data
|
182 |
-
if 'image' in card_data:
|
183 |
-
v2_data['image'] = card_data['image']
|
184 |
-
|
185 |
-
return v2_data
|
186 |
-
|
187 |
-
|
188 |
-
def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
|
189 |
-
"""Add or update a character card in the database."""
|
190 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
191 |
-
cursor = conn.cursor()
|
192 |
-
try:
|
193 |
-
parsed_card = parse_character_card(card_data)
|
194 |
-
|
195 |
-
# Check if character already exists
|
196 |
-
cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
|
197 |
-
row = cursor.fetchone()
|
198 |
-
|
199 |
-
if row:
|
200 |
-
# Update existing character
|
201 |
-
character_id = row[0]
|
202 |
-
update_query = """
|
203 |
-
UPDATE CharacterCards
|
204 |
-
SET description = ?, personality = ?, scenario = ?, image = ?,
|
205 |
-
post_history_instructions = ?, first_mes = ?, mes_example = ?,
|
206 |
-
creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
|
207 |
-
tags = ?, creator = ?, character_version = ?, extensions = ?
|
208 |
-
WHERE id = ?
|
209 |
-
"""
|
210 |
-
cursor.execute(update_query, (
|
211 |
-
parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
|
212 |
-
parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
|
213 |
-
parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
|
214 |
-
parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
|
215 |
-
parsed_card['character_version'], parsed_card['extensions'], character_id
|
216 |
-
))
|
217 |
-
else:
|
218 |
-
# Insert new character
|
219 |
-
insert_query = """
|
220 |
-
INSERT INTO CharacterCards (name, description, personality, scenario, image,
|
221 |
-
post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
|
222 |
-
alternate_greetings, tags, creator, character_version, extensions)
|
223 |
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
224 |
-
"""
|
225 |
-
cursor.execute(insert_query, (
|
226 |
-
parsed_card['name'], parsed_card['description'], parsed_card['personality'],
|
227 |
-
parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
|
228 |
-
parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
|
229 |
-
parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
|
230 |
-
parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
|
231 |
-
))
|
232 |
-
character_id = cursor.lastrowid
|
233 |
-
|
234 |
-
conn.commit()
|
235 |
-
return character_id
|
236 |
-
except sqlite3.IntegrityError as e:
|
237 |
-
logging.error(f"Error adding character card: {e}")
|
238 |
-
return None
|
239 |
-
except Exception as e:
|
240 |
-
logging.error(f"Unexpected error adding character card: {e}")
|
241 |
-
return None
|
242 |
-
finally:
|
243 |
-
conn.close()
|
244 |
-
|
245 |
-
# def add_character_card(card_data: Dict) -> Optional[int]:
|
246 |
-
# """Add or update a character card in the database.
|
247 |
-
#
|
248 |
-
# Returns the ID of the inserted character or None if failed.
|
249 |
-
# """
|
250 |
-
# conn = sqlite3.connect(chat_DB_PATH)
|
251 |
-
# cursor = conn.cursor()
|
252 |
-
# try:
|
253 |
-
# # Ensure all required fields are present
|
254 |
-
# required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
|
255 |
-
# for field in required_fields:
|
256 |
-
# if field not in card_data:
|
257 |
-
# card_data[field] = '' # Assign empty string if field is missing
|
258 |
-
#
|
259 |
-
# # Check if character already exists
|
260 |
-
# cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
|
261 |
-
# row = cursor.fetchone()
|
262 |
-
#
|
263 |
-
# if row:
|
264 |
-
# # Update existing character
|
265 |
-
# character_id = row[0]
|
266 |
-
# cursor.execute("""
|
267 |
-
# UPDATE CharacterCards
|
268 |
-
# SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
|
269 |
-
# WHERE id = ?
|
270 |
-
# """, (
|
271 |
-
# card_data['description'],
|
272 |
-
# card_data['personality'],
|
273 |
-
# card_data['scenario'],
|
274 |
-
# card_data['image'],
|
275 |
-
# card_data['post_history_instructions'],
|
276 |
-
# card_data['first_message'],
|
277 |
-
# character_id
|
278 |
-
# ))
|
279 |
-
# else:
|
280 |
-
# # Insert new character
|
281 |
-
# cursor.execute("""
|
282 |
-
# INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
|
283 |
-
# VALUES (?, ?, ?, ?, ?, ?, ?)
|
284 |
-
# """, (
|
285 |
-
# card_data['name'],
|
286 |
-
# card_data['description'],
|
287 |
-
# card_data['personality'],
|
288 |
-
# card_data['scenario'],
|
289 |
-
# card_data['image'],
|
290 |
-
# card_data['post_history_instructions'],
|
291 |
-
# card_data['first_message']
|
292 |
-
# ))
|
293 |
-
# character_id = cursor.lastrowid
|
294 |
-
#
|
295 |
-
# conn.commit()
|
296 |
-
# return cursor.lastrowid
|
297 |
-
# except sqlite3.IntegrityError as e:
|
298 |
-
# logging.error(f"Error adding character card: {e}")
|
299 |
-
# return None
|
300 |
-
# except Exception as e:
|
301 |
-
# logging.error(f"Unexpected error adding character card: {e}")
|
302 |
-
# return None
|
303 |
-
# finally:
|
304 |
-
# conn.close()
|
305 |
-
|
306 |
-
|
307 |
-
def get_character_cards() -> List[Dict]:
|
308 |
-
"""Retrieve all character cards from the database."""
|
309 |
-
logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
|
310 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
311 |
-
cursor = conn.cursor()
|
312 |
-
cursor.execute("SELECT * FROM CharacterCards")
|
313 |
-
rows = cursor.fetchall()
|
314 |
-
columns = [description[0] for description in cursor.description]
|
315 |
-
conn.close()
|
316 |
-
characters = [dict(zip(columns, row)) for row in rows]
|
317 |
-
#logging.debug(f"Characters fetched from DB: {characters}")
|
318 |
-
return characters
|
319 |
-
|
320 |
-
|
321 |
-
def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
322 |
-
"""
|
323 |
-
Retrieve a single character card by its ID.
|
324 |
-
|
325 |
-
Args:
|
326 |
-
character_id: Can be either an integer ID or a dictionary containing character data.
|
327 |
-
|
328 |
-
Returns:
|
329 |
-
A dictionary containing the character card data, or None if not found.
|
330 |
-
"""
|
331 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
332 |
-
cursor = conn.cursor()
|
333 |
-
try:
|
334 |
-
if isinstance(character_id, dict):
|
335 |
-
# If a dictionary is passed, assume it's already a character card
|
336 |
-
return character_id
|
337 |
-
elif isinstance(character_id, int):
|
338 |
-
# If an integer is passed, fetch the character from the database
|
339 |
-
cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
|
340 |
-
row = cursor.fetchone()
|
341 |
-
if row:
|
342 |
-
columns = [description[0] for description in cursor.description]
|
343 |
-
return dict(zip(columns, row))
|
344 |
-
else:
|
345 |
-
logging.warning(f"Invalid type for character_id: {type(character_id)}")
|
346 |
-
return None
|
347 |
-
except Exception as e:
|
348 |
-
logging.error(f"Error in get_character_card_by_id: {e}")
|
349 |
-
return None
|
350 |
-
finally:
|
351 |
-
conn.close()
|
352 |
-
|
353 |
-
|
354 |
-
def update_character_card(character_id: int, card_data: Dict) -> bool:
|
355 |
-
"""Update an existing character card."""
|
356 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
357 |
-
cursor = conn.cursor()
|
358 |
-
try:
|
359 |
-
cursor.execute("""
|
360 |
-
UPDATE CharacterCards
|
361 |
-
SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
|
362 |
-
WHERE id = ?
|
363 |
-
""", (
|
364 |
-
card_data.get('name'),
|
365 |
-
card_data.get('description'),
|
366 |
-
card_data.get('personality'),
|
367 |
-
card_data.get('scenario'),
|
368 |
-
card_data.get('image'),
|
369 |
-
card_data.get('post_history_instructions', ''),
|
370 |
-
card_data.get('first_message', "Hello! I'm ready to chat."),
|
371 |
-
character_id
|
372 |
-
))
|
373 |
-
conn.commit()
|
374 |
-
return cursor.rowcount > 0
|
375 |
-
except sqlite3.IntegrityError as e:
|
376 |
-
logging.error(f"Error updating character card: {e}")
|
377 |
-
return False
|
378 |
-
finally:
|
379 |
-
conn.close()
|
380 |
-
|
381 |
-
|
382 |
-
def delete_character_card(character_id: int) -> bool:
|
383 |
-
"""Delete a character card and its associated chats."""
|
384 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
385 |
-
cursor = conn.cursor()
|
386 |
-
try:
|
387 |
-
# Delete associated chats first due to foreign key constraint
|
388 |
-
cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
|
389 |
-
cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
|
390 |
-
conn.commit()
|
391 |
-
return cursor.rowcount > 0
|
392 |
-
except sqlite3.Error as e:
|
393 |
-
logging.error(f"Error deleting character card: {e}")
|
394 |
-
return False
|
395 |
-
finally:
|
396 |
-
conn.close()
|
397 |
-
|
398 |
-
|
399 |
-
def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
|
400 |
-
"""
|
401 |
-
Add a new chat history for a character, optionally associating keywords.
|
402 |
-
|
403 |
-
Args:
|
404 |
-
character_id (int): The ID of the character.
|
405 |
-
conversation_name (str): Name of the conversation.
|
406 |
-
chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
|
407 |
-
keywords (Optional[List[str]]): List of keywords to associate with this chat.
|
408 |
-
is_snapshot (bool, optional): Whether this chat is a snapshot.
|
409 |
-
|
410 |
-
Returns:
|
411 |
-
Optional[int]: The ID of the inserted chat or None if failed.
|
412 |
-
"""
|
413 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
414 |
-
cursor = conn.cursor()
|
415 |
-
try:
|
416 |
-
chat_history_json = json.dumps(chat_history)
|
417 |
-
cursor.execute("""
|
418 |
-
INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
|
419 |
-
VALUES (?, ?, ?, ?)
|
420 |
-
""", (
|
421 |
-
character_id,
|
422 |
-
conversation_name,
|
423 |
-
chat_history_json,
|
424 |
-
is_snapshot
|
425 |
-
))
|
426 |
-
chat_id = cursor.lastrowid
|
427 |
-
|
428 |
-
if keywords:
|
429 |
-
# Insert keywords into ChatKeywords table
|
430 |
-
keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
|
431 |
-
cursor.executemany("""
|
432 |
-
INSERT INTO ChatKeywords (chat_id, keyword)
|
433 |
-
VALUES (?, ?)
|
434 |
-
""", keyword_records)
|
435 |
-
|
436 |
-
conn.commit()
|
437 |
-
return chat_id
|
438 |
-
except sqlite3.Error as e:
|
439 |
-
logging.error(f"Error adding character chat: {e}")
|
440 |
-
return None
|
441 |
-
finally:
|
442 |
-
conn.close()
|
443 |
-
|
444 |
-
|
445 |
-
def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
|
446 |
-
"""Retrieve all chats, or chats for a specific character if character_id is provided."""
|
447 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
448 |
-
cursor = conn.cursor()
|
449 |
-
if character_id is not None:
|
450 |
-
cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
|
451 |
-
else:
|
452 |
-
cursor.execute("SELECT * FROM CharacterChats")
|
453 |
-
rows = cursor.fetchall()
|
454 |
-
columns = [description[0] for description in cursor.description]
|
455 |
-
conn.close()
|
456 |
-
return [dict(zip(columns, row)) for row in rows]
|
457 |
-
|
458 |
-
|
459 |
-
def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
|
460 |
-
"""Retrieve a single chat by its ID."""
|
461 |
-
conn = sqlite3.connect(chat_DB_PATH)
|
462 |
-
cursor = conn.cursor()
|
463 |
-
cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
|
464 |
-
row = cursor.fetchone()
|
465 |
-
conn.close()
|
466 |
-
if row:
|
467 |
-
columns = [description[0] for description in cursor.description]
|
468 |
-
chat = dict(zip(columns, row))
|
469 |
-
chat['chat_history'] = json.loads(chat['chat_history'])
|
470 |
-
return chat
|
471 |
-
return None
|
472 |
-
|
473 |
-
|
474 |
-
def search_character_chats(query: str) -> Tuple[List[Dict], str]:
|
475 |
-
"""
|
476 |
-
Search for character chats using FTS5.
|
477 |
-
|
478 |
-
Args:
|
479 |
-
query (str): The search query.
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# character_chat_db.py
|
2 |
+
# Database functions for managing character cards and chat histories.
|
3 |
+
# #
|
4 |
+
# Imports
|
5 |
+
import configparser
|
6 |
+
import sqlite3
|
7 |
+
import json
|
8 |
+
import os
|
9 |
+
import sys
|
10 |
+
from typing import List, Dict, Optional, Tuple, Any, Union
|
11 |
+
|
12 |
+
from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
|
13 |
+
from Tests.Chat_APIs.Chat_APIs_Integration_test import logging
|
14 |
+
|
15 |
+
#
|
16 |
+
#######################################################################################################################
|
17 |
+
#
|
18 |
+
#
|
19 |
+
|
20 |
+
def ensure_database_directory():
|
21 |
+
os.makedirs(get_database_dir(), exist_ok=True)
|
22 |
+
|
23 |
+
ensure_database_directory()
|
24 |
+
|
25 |
+
|
26 |
+
# Construct the path to the config file
|
27 |
+
config_path = get_project_relative_path('Config_Files/config.txt')
|
28 |
+
|
29 |
+
# Read the config file
|
30 |
+
config = configparser.ConfigParser()
|
31 |
+
config.read(config_path)
|
32 |
+
|
33 |
+
# Get the chat db path from the config, or use the default if not specified
|
34 |
+
chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
|
35 |
+
print(f"Chat Database path: {chat_DB_PATH}")
|
36 |
+
|
37 |
+
########################################################################################################
|
38 |
+
#
|
39 |
+
# Functions
|
40 |
+
|
41 |
+
# FIXME - Setup properly and test/add documentation for its existence...
|
42 |
+
def initialize_database():
|
43 |
+
"""Initialize the SQLite database with required tables and FTS5 virtual tables."""
|
44 |
+
conn = None
|
45 |
+
try:
|
46 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
47 |
+
cursor = conn.cursor()
|
48 |
+
|
49 |
+
# Enable foreign key constraints
|
50 |
+
cursor.execute("PRAGMA foreign_keys = ON;")
|
51 |
+
|
52 |
+
# Create CharacterCards table with V2 fields
|
53 |
+
cursor.execute("""
|
54 |
+
CREATE TABLE IF NOT EXISTS CharacterCards (
|
55 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
56 |
+
name TEXT UNIQUE NOT NULL,
|
57 |
+
description TEXT,
|
58 |
+
personality TEXT,
|
59 |
+
scenario TEXT,
|
60 |
+
image BLOB,
|
61 |
+
post_history_instructions TEXT,
|
62 |
+
first_mes TEXT,
|
63 |
+
mes_example TEXT,
|
64 |
+
creator_notes TEXT,
|
65 |
+
system_prompt TEXT,
|
66 |
+
alternate_greetings TEXT,
|
67 |
+
tags TEXT,
|
68 |
+
creator TEXT,
|
69 |
+
character_version TEXT,
|
70 |
+
extensions TEXT,
|
71 |
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
72 |
+
);
|
73 |
+
""")
|
74 |
+
|
75 |
+
# Create CharacterChats table
|
76 |
+
cursor.execute("""
|
77 |
+
CREATE TABLE IF NOT EXISTS CharacterChats (
|
78 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
79 |
+
character_id INTEGER NOT NULL,
|
80 |
+
conversation_name TEXT,
|
81 |
+
chat_history TEXT,
|
82 |
+
is_snapshot BOOLEAN DEFAULT FALSE,
|
83 |
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
84 |
+
FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
|
85 |
+
);
|
86 |
+
""")
|
87 |
+
|
88 |
+
# Create FTS5 virtual table for CharacterChats
|
89 |
+
cursor.execute("""
|
90 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
|
91 |
+
conversation_name,
|
92 |
+
chat_history,
|
93 |
+
content='CharacterChats',
|
94 |
+
content_rowid='id'
|
95 |
+
);
|
96 |
+
""")
|
97 |
+
|
98 |
+
# Create triggers to keep FTS5 table in sync with CharacterChats
|
99 |
+
cursor.executescript("""
|
100 |
+
CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
|
101 |
+
INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
|
102 |
+
VALUES (new.id, new.conversation_name, new.chat_history);
|
103 |
+
END;
|
104 |
+
|
105 |
+
CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
|
106 |
+
DELETE FROM CharacterChats_fts WHERE rowid = old.id;
|
107 |
+
END;
|
108 |
+
|
109 |
+
CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
|
110 |
+
UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
|
111 |
+
WHERE rowid = new.id;
|
112 |
+
END;
|
113 |
+
""")
|
114 |
+
|
115 |
+
# Create ChatKeywords table
|
116 |
+
cursor.execute("""
|
117 |
+
CREATE TABLE IF NOT EXISTS ChatKeywords (
|
118 |
+
chat_id INTEGER NOT NULL,
|
119 |
+
keyword TEXT NOT NULL,
|
120 |
+
FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
|
121 |
+
);
|
122 |
+
""")
|
123 |
+
|
124 |
+
# Create indexes for faster searches
|
125 |
+
cursor.execute("""
|
126 |
+
CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
|
127 |
+
""")
|
128 |
+
cursor.execute("""
|
129 |
+
CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
|
130 |
+
""")
|
131 |
+
|
132 |
+
conn.commit()
|
133 |
+
logging.info("Database initialized successfully.")
|
134 |
+
except sqlite3.Error as e:
|
135 |
+
logging.error(f"SQLite error occurred during database initialization: {e}")
|
136 |
+
if conn:
|
137 |
+
conn.rollback()
|
138 |
+
raise
|
139 |
+
except Exception as e:
|
140 |
+
logging.error(f"Unexpected error occurred during database initialization: {e}")
|
141 |
+
if conn:
|
142 |
+
conn.rollback()
|
143 |
+
raise
|
144 |
+
finally:
|
145 |
+
if conn:
|
146 |
+
conn.close()
|
147 |
+
|
148 |
+
# Call initialize_database() at the start of your application
|
149 |
+
def setup_chat_database():
|
150 |
+
try:
|
151 |
+
initialize_database()
|
152 |
+
except Exception as e:
|
153 |
+
logging.critical(f"Failed to initialize database: {e}")
|
154 |
+
sys.exit(1)
|
155 |
+
|
156 |
+
setup_chat_database()
|
157 |
+
|
158 |
+
########################################################################################################
|
159 |
+
#
|
160 |
+
# Character Card handling
|
161 |
+
|
162 |
+
def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
|
163 |
+
"""Parse and validate a character card according to V2 specification."""
|
164 |
+
v2_data = {
|
165 |
+
'name': card_data.get('name', ''),
|
166 |
+
'description': card_data.get('description', ''),
|
167 |
+
'personality': card_data.get('personality', ''),
|
168 |
+
'scenario': card_data.get('scenario', ''),
|
169 |
+
'first_mes': card_data.get('first_mes', ''),
|
170 |
+
'mes_example': card_data.get('mes_example', ''),
|
171 |
+
'creator_notes': card_data.get('creator_notes', ''),
|
172 |
+
'system_prompt': card_data.get('system_prompt', ''),
|
173 |
+
'post_history_instructions': card_data.get('post_history_instructions', ''),
|
174 |
+
'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
|
175 |
+
'tags': json.dumps(card_data.get('tags', [])),
|
176 |
+
'creator': card_data.get('creator', ''),
|
177 |
+
'character_version': card_data.get('character_version', ''),
|
178 |
+
'extensions': json.dumps(card_data.get('extensions', {}))
|
179 |
+
}
|
180 |
+
|
181 |
+
# Handle 'image' separately as it might be binary data
|
182 |
+
if 'image' in card_data:
|
183 |
+
v2_data['image'] = card_data['image']
|
184 |
+
|
185 |
+
return v2_data
|
186 |
+
|
187 |
+
|
188 |
+
def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
|
189 |
+
"""Add or update a character card in the database."""
|
190 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
191 |
+
cursor = conn.cursor()
|
192 |
+
try:
|
193 |
+
parsed_card = parse_character_card(card_data)
|
194 |
+
|
195 |
+
# Check if character already exists
|
196 |
+
cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
|
197 |
+
row = cursor.fetchone()
|
198 |
+
|
199 |
+
if row:
|
200 |
+
# Update existing character
|
201 |
+
character_id = row[0]
|
202 |
+
update_query = """
|
203 |
+
UPDATE CharacterCards
|
204 |
+
SET description = ?, personality = ?, scenario = ?, image = ?,
|
205 |
+
post_history_instructions = ?, first_mes = ?, mes_example = ?,
|
206 |
+
creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
|
207 |
+
tags = ?, creator = ?, character_version = ?, extensions = ?
|
208 |
+
WHERE id = ?
|
209 |
+
"""
|
210 |
+
cursor.execute(update_query, (
|
211 |
+
parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
|
212 |
+
parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
|
213 |
+
parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
|
214 |
+
parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
|
215 |
+
parsed_card['character_version'], parsed_card['extensions'], character_id
|
216 |
+
))
|
217 |
+
else:
|
218 |
+
# Insert new character
|
219 |
+
insert_query = """
|
220 |
+
INSERT INTO CharacterCards (name, description, personality, scenario, image,
|
221 |
+
post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
|
222 |
+
alternate_greetings, tags, creator, character_version, extensions)
|
223 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
224 |
+
"""
|
225 |
+
cursor.execute(insert_query, (
|
226 |
+
parsed_card['name'], parsed_card['description'], parsed_card['personality'],
|
227 |
+
parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
|
228 |
+
parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
|
229 |
+
parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
|
230 |
+
parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
|
231 |
+
))
|
232 |
+
character_id = cursor.lastrowid
|
233 |
+
|
234 |
+
conn.commit()
|
235 |
+
return character_id
|
236 |
+
except sqlite3.IntegrityError as e:
|
237 |
+
logging.error(f"Error adding character card: {e}")
|
238 |
+
return None
|
239 |
+
except Exception as e:
|
240 |
+
logging.error(f"Unexpected error adding character card: {e}")
|
241 |
+
return None
|
242 |
+
finally:
|
243 |
+
conn.close()
|
244 |
+
|
245 |
+
# def add_character_card(card_data: Dict) -> Optional[int]:
|
246 |
+
# """Add or update a character card in the database.
|
247 |
+
#
|
248 |
+
# Returns the ID of the inserted character or None if failed.
|
249 |
+
# """
|
250 |
+
# conn = sqlite3.connect(chat_DB_PATH)
|
251 |
+
# cursor = conn.cursor()
|
252 |
+
# try:
|
253 |
+
# # Ensure all required fields are present
|
254 |
+
# required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
|
255 |
+
# for field in required_fields:
|
256 |
+
# if field not in card_data:
|
257 |
+
# card_data[field] = '' # Assign empty string if field is missing
|
258 |
+
#
|
259 |
+
# # Check if character already exists
|
260 |
+
# cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
|
261 |
+
# row = cursor.fetchone()
|
262 |
+
#
|
263 |
+
# if row:
|
264 |
+
# # Update existing character
|
265 |
+
# character_id = row[0]
|
266 |
+
# cursor.execute("""
|
267 |
+
# UPDATE CharacterCards
|
268 |
+
# SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
|
269 |
+
# WHERE id = ?
|
270 |
+
# """, (
|
271 |
+
# card_data['description'],
|
272 |
+
# card_data['personality'],
|
273 |
+
# card_data['scenario'],
|
274 |
+
# card_data['image'],
|
275 |
+
# card_data['post_history_instructions'],
|
276 |
+
# card_data['first_message'],
|
277 |
+
# character_id
|
278 |
+
# ))
|
279 |
+
# else:
|
280 |
+
# # Insert new character
|
281 |
+
# cursor.execute("""
|
282 |
+
# INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
|
283 |
+
# VALUES (?, ?, ?, ?, ?, ?, ?)
|
284 |
+
# """, (
|
285 |
+
# card_data['name'],
|
286 |
+
# card_data['description'],
|
287 |
+
# card_data['personality'],
|
288 |
+
# card_data['scenario'],
|
289 |
+
# card_data['image'],
|
290 |
+
# card_data['post_history_instructions'],
|
291 |
+
# card_data['first_message']
|
292 |
+
# ))
|
293 |
+
# character_id = cursor.lastrowid
|
294 |
+
#
|
295 |
+
# conn.commit()
|
296 |
+
# return cursor.lastrowid
|
297 |
+
# except sqlite3.IntegrityError as e:
|
298 |
+
# logging.error(f"Error adding character card: {e}")
|
299 |
+
# return None
|
300 |
+
# except Exception as e:
|
301 |
+
# logging.error(f"Unexpected error adding character card: {e}")
|
302 |
+
# return None
|
303 |
+
# finally:
|
304 |
+
# conn.close()
|
305 |
+
|
306 |
+
|
307 |
+
def get_character_cards() -> List[Dict]:
|
308 |
+
"""Retrieve all character cards from the database."""
|
309 |
+
logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
|
310 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
311 |
+
cursor = conn.cursor()
|
312 |
+
cursor.execute("SELECT * FROM CharacterCards")
|
313 |
+
rows = cursor.fetchall()
|
314 |
+
columns = [description[0] for description in cursor.description]
|
315 |
+
conn.close()
|
316 |
+
characters = [dict(zip(columns, row)) for row in rows]
|
317 |
+
#logging.debug(f"Characters fetched from DB: {characters}")
|
318 |
+
return characters
|
319 |
+
|
320 |
+
|
321 |
+
def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
322 |
+
"""
|
323 |
+
Retrieve a single character card by its ID.
|
324 |
+
|
325 |
+
Args:
|
326 |
+
character_id: Can be either an integer ID or a dictionary containing character data.
|
327 |
+
|
328 |
+
Returns:
|
329 |
+
A dictionary containing the character card data, or None if not found.
|
330 |
+
"""
|
331 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
332 |
+
cursor = conn.cursor()
|
333 |
+
try:
|
334 |
+
if isinstance(character_id, dict):
|
335 |
+
# If a dictionary is passed, assume it's already a character card
|
336 |
+
return character_id
|
337 |
+
elif isinstance(character_id, int):
|
338 |
+
# If an integer is passed, fetch the character from the database
|
339 |
+
cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
|
340 |
+
row = cursor.fetchone()
|
341 |
+
if row:
|
342 |
+
columns = [description[0] for description in cursor.description]
|
343 |
+
return dict(zip(columns, row))
|
344 |
+
else:
|
345 |
+
logging.warning(f"Invalid type for character_id: {type(character_id)}")
|
346 |
+
return None
|
347 |
+
except Exception as e:
|
348 |
+
logging.error(f"Error in get_character_card_by_id: {e}")
|
349 |
+
return None
|
350 |
+
finally:
|
351 |
+
conn.close()
|
352 |
+
|
353 |
+
|
354 |
+
def update_character_card(character_id: int, card_data: Dict) -> bool:
|
355 |
+
"""Update an existing character card."""
|
356 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
357 |
+
cursor = conn.cursor()
|
358 |
+
try:
|
359 |
+
cursor.execute("""
|
360 |
+
UPDATE CharacterCards
|
361 |
+
SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
|
362 |
+
WHERE id = ?
|
363 |
+
""", (
|
364 |
+
card_data.get('name'),
|
365 |
+
card_data.get('description'),
|
366 |
+
card_data.get('personality'),
|
367 |
+
card_data.get('scenario'),
|
368 |
+
card_data.get('image'),
|
369 |
+
card_data.get('post_history_instructions', ''),
|
370 |
+
card_data.get('first_message', "Hello! I'm ready to chat."),
|
371 |
+
character_id
|
372 |
+
))
|
373 |
+
conn.commit()
|
374 |
+
return cursor.rowcount > 0
|
375 |
+
except sqlite3.IntegrityError as e:
|
376 |
+
logging.error(f"Error updating character card: {e}")
|
377 |
+
return False
|
378 |
+
finally:
|
379 |
+
conn.close()
|
380 |
+
|
381 |
+
|
382 |
+
def delete_character_card(character_id: int) -> bool:
|
383 |
+
"""Delete a character card and its associated chats."""
|
384 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
385 |
+
cursor = conn.cursor()
|
386 |
+
try:
|
387 |
+
# Delete associated chats first due to foreign key constraint
|
388 |
+
cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
|
389 |
+
cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
|
390 |
+
conn.commit()
|
391 |
+
return cursor.rowcount > 0
|
392 |
+
except sqlite3.Error as e:
|
393 |
+
logging.error(f"Error deleting character card: {e}")
|
394 |
+
return False
|
395 |
+
finally:
|
396 |
+
conn.close()
|
397 |
+
|
398 |
+
|
399 |
+
def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
|
400 |
+
"""
|
401 |
+
Add a new chat history for a character, optionally associating keywords.
|
402 |
+
|
403 |
+
Args:
|
404 |
+
character_id (int): The ID of the character.
|
405 |
+
conversation_name (str): Name of the conversation.
|
406 |
+
chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
|
407 |
+
keywords (Optional[List[str]]): List of keywords to associate with this chat.
|
408 |
+
is_snapshot (bool, optional): Whether this chat is a snapshot.
|
409 |
+
|
410 |
+
Returns:
|
411 |
+
Optional[int]: The ID of the inserted chat or None if failed.
|
412 |
+
"""
|
413 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
414 |
+
cursor = conn.cursor()
|
415 |
+
try:
|
416 |
+
chat_history_json = json.dumps(chat_history)
|
417 |
+
cursor.execute("""
|
418 |
+
INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
|
419 |
+
VALUES (?, ?, ?, ?)
|
420 |
+
""", (
|
421 |
+
character_id,
|
422 |
+
conversation_name,
|
423 |
+
chat_history_json,
|
424 |
+
is_snapshot
|
425 |
+
))
|
426 |
+
chat_id = cursor.lastrowid
|
427 |
+
|
428 |
+
if keywords:
|
429 |
+
# Insert keywords into ChatKeywords table
|
430 |
+
keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
|
431 |
+
cursor.executemany("""
|
432 |
+
INSERT INTO ChatKeywords (chat_id, keyword)
|
433 |
+
VALUES (?, ?)
|
434 |
+
""", keyword_records)
|
435 |
+
|
436 |
+
conn.commit()
|
437 |
+
return chat_id
|
438 |
+
except sqlite3.Error as e:
|
439 |
+
logging.error(f"Error adding character chat: {e}")
|
440 |
+
return None
|
441 |
+
finally:
|
442 |
+
conn.close()
|
443 |
+
|
444 |
+
|
445 |
+
def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
|
446 |
+
"""Retrieve all chats, or chats for a specific character if character_id is provided."""
|
447 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
448 |
+
cursor = conn.cursor()
|
449 |
+
if character_id is not None:
|
450 |
+
cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
|
451 |
+
else:
|
452 |
+
cursor.execute("SELECT * FROM CharacterChats")
|
453 |
+
rows = cursor.fetchall()
|
454 |
+
columns = [description[0] for description in cursor.description]
|
455 |
+
conn.close()
|
456 |
+
return [dict(zip(columns, row)) for row in rows]
|
457 |
+
|
458 |
+
|
459 |
+
def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
|
460 |
+
"""Retrieve a single chat by its ID."""
|
461 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
462 |
+
cursor = conn.cursor()
|
463 |
+
cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
|
464 |
+
row = cursor.fetchone()
|
465 |
+
conn.close()
|
466 |
+
if row:
|
467 |
+
columns = [description[0] for description in cursor.description]
|
468 |
+
chat = dict(zip(columns, row))
|
469 |
+
chat['chat_history'] = json.loads(chat['chat_history'])
|
470 |
+
return chat
|
471 |
+
return None
|
472 |
+
|
473 |
+
|
474 |
+
def search_character_chats(query: str, character_id: Optional[int] = None) -> Tuple[List[Dict], str]:
|
475 |
+
"""
|
476 |
+
Search for character chats using FTS5, optionally filtered by character_id.
|
477 |
+
|
478 |
+
Args:
|
479 |
+
query (str): The search query.
|
480 |
+
character_id (Optional[int]): The ID of the character to filter chats by.
|
481 |
+
|
482 |
+
Returns:
|
483 |
+
Tuple[List[Dict], str]: A list of matching chats and a status message.
|
484 |
+
"""
|
485 |
+
if not query.strip():
|
486 |
+
return [], "Please enter a search query."
|
487 |
+
|
488 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
489 |
+
cursor = conn.cursor()
|
490 |
+
try:
|
491 |
+
if character_id is not None:
|
492 |
+
# Search with character_id filter
|
493 |
+
cursor.execute("""
|
494 |
+
SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
|
495 |
+
FROM CharacterChats_fts
|
496 |
+
JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
|
497 |
+
WHERE CharacterChats_fts MATCH ? AND CharacterChats.character_id = ?
|
498 |
+
ORDER BY rank
|
499 |
+
""", (query, character_id))
|
500 |
+
else:
|
501 |
+
# Search without character_id filter
|
502 |
+
cursor.execute("""
|
503 |
+
SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
|
504 |
+
FROM CharacterChats_fts
|
505 |
+
JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
|
506 |
+
WHERE CharacterChats_fts MATCH ?
|
507 |
+
ORDER BY rank
|
508 |
+
""", (query,))
|
509 |
+
|
510 |
+
rows = cursor.fetchall()
|
511 |
+
columns = [description[0] for description in cursor.description]
|
512 |
+
results = [dict(zip(columns, row)) for row in rows]
|
513 |
+
|
514 |
+
if character_id is not None:
|
515 |
+
status_message = f"Found {len(results)} chat(s) matching '{query}' for the selected character."
|
516 |
+
else:
|
517 |
+
status_message = f"Found {len(results)} chat(s) matching '{query}' across all characters."
|
518 |
+
|
519 |
+
return results, status_message
|
520 |
+
except Exception as e:
|
521 |
+
logging.error(f"Error searching chats with FTS5: {e}")
|
522 |
+
return [], f"Error occurred during search: {e}"
|
523 |
+
finally:
|
524 |
+
conn.close()
|
525 |
+
|
526 |
+
def update_character_chat(chat_id: int, chat_history: List[Tuple[str, str]]) -> bool:
|
527 |
+
"""Update an existing chat history."""
|
528 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
529 |
+
cursor = conn.cursor()
|
530 |
+
try:
|
531 |
+
chat_history_json = json.dumps(chat_history)
|
532 |
+
cursor.execute("""
|
533 |
+
UPDATE CharacterChats
|
534 |
+
SET chat_history = ?
|
535 |
+
WHERE id = ?
|
536 |
+
""", (
|
537 |
+
chat_history_json,
|
538 |
+
chat_id
|
539 |
+
))
|
540 |
+
conn.commit()
|
541 |
+
return cursor.rowcount > 0
|
542 |
+
except sqlite3.Error as e:
|
543 |
+
logging.error(f"Error updating character chat: {e}")
|
544 |
+
return False
|
545 |
+
finally:
|
546 |
+
conn.close()
|
547 |
+
|
548 |
+
|
549 |
+
def delete_character_chat(chat_id: int) -> bool:
|
550 |
+
"""Delete a specific chat."""
|
551 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
552 |
+
cursor = conn.cursor()
|
553 |
+
try:
|
554 |
+
cursor.execute("DELETE FROM CharacterChats WHERE id = ?", (chat_id,))
|
555 |
+
conn.commit()
|
556 |
+
return cursor.rowcount > 0
|
557 |
+
except sqlite3.Error as e:
|
558 |
+
logging.error(f"Error deleting character chat: {e}")
|
559 |
+
return False
|
560 |
+
finally:
|
561 |
+
conn.close()
|
562 |
+
|
563 |
+
def fetch_keywords_for_chats(keywords: List[str]) -> List[int]:
|
564 |
+
"""
|
565 |
+
Fetch chat IDs associated with any of the specified keywords.
|
566 |
+
|
567 |
+
Args:
|
568 |
+
keywords (List[str]): List of keywords to search for.
|
569 |
+
|
570 |
+
Returns:
|
571 |
+
List[int]: List of chat IDs associated with the keywords.
|
572 |
+
"""
|
573 |
+
if not keywords:
|
574 |
+
return []
|
575 |
+
|
576 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
577 |
+
cursor = conn.cursor()
|
578 |
+
try:
|
579 |
+
# Construct the WHERE clause to search for each keyword
|
580 |
+
keyword_clauses = " OR ".join(["keyword = ?"] * len(keywords))
|
581 |
+
sql_query = f"SELECT DISTINCT chat_id FROM ChatKeywords WHERE {keyword_clauses}"
|
582 |
+
cursor.execute(sql_query, keywords)
|
583 |
+
rows = cursor.fetchall()
|
584 |
+
chat_ids = [row[0] for row in rows]
|
585 |
+
return chat_ids
|
586 |
+
except Exception as e:
|
587 |
+
logging.error(f"Error in fetch_keywords_for_chats: {e}")
|
588 |
+
return []
|
589 |
+
finally:
|
590 |
+
conn.close()
|
591 |
+
|
592 |
+
def save_chat_history_to_character_db(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]]) -> Optional[int]:
|
593 |
+
"""Save chat history to the CharacterChats table.
|
594 |
+
|
595 |
+
Returns the ID of the inserted chat or None if failed.
|
596 |
+
"""
|
597 |
+
return add_character_chat(character_id, conversation_name, chat_history)
|
598 |
+
|
599 |
+
def migrate_chat_to_media_db():
|
600 |
+
pass
|
601 |
+
|
602 |
+
|
603 |
+
def search_db(query: str, fields: List[str], where_clause: str = "", page: int = 1, results_per_page: int = 5) -> List[Dict[str, Any]]:
|
604 |
+
"""
|
605 |
+
Perform a full-text search on specified fields with optional filtering and pagination.
|
606 |
+
|
607 |
+
Args:
|
608 |
+
query (str): The search query.
|
609 |
+
fields (List[str]): List of fields to search in.
|
610 |
+
where_clause (str, optional): Additional SQL WHERE clause to filter results.
|
611 |
+
page (int, optional): Page number for pagination.
|
612 |
+
results_per_page (int, optional): Number of results per page.
|
613 |
+
|
614 |
+
Returns:
|
615 |
+
List[Dict[str, Any]]: List of matching chat records with content and metadata.
|
616 |
+
"""
|
617 |
+
if not query.strip():
|
618 |
+
return []
|
619 |
+
|
620 |
+
conn = sqlite3.connect(chat_DB_PATH)
|
621 |
+
cursor = conn.cursor()
|
622 |
+
try:
|
623 |
+
# Construct the MATCH query for FTS5
|
624 |
+
match_query = " AND ".join(fields) + f" MATCH ?"
|
625 |
+
# Adjust the query with the fields
|
626 |
+
fts_query = f"""
|
627 |
+
SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
|
628 |
+
FROM CharacterChats_fts
|
629 |
+
JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
|
630 |
+
WHERE {match_query}
|
631 |
+
"""
|
632 |
+
if where_clause:
|
633 |
+
fts_query += f" AND ({where_clause})"
|
634 |
+
fts_query += " ORDER BY rank LIMIT ? OFFSET ?"
|
635 |
+
offset = (page - 1) * results_per_page
|
636 |
+
cursor.execute(fts_query, (query, results_per_page, offset))
|
637 |
+
rows = cursor.fetchall()
|
638 |
+
columns = [description[0] for description in cursor.description]
|
639 |
+
results = [dict(zip(columns, row)) for row in rows]
|
640 |
+
return results
|
641 |
+
except Exception as e:
|
642 |
+
logging.error(f"Error in search_db: {e}")
|
643 |
+
return []
|
644 |
+
finally:
|
645 |
+
conn.close()
|
646 |
+
|
647 |
+
|
648 |
+
def perform_full_text_search_chat(query: str, relevant_chat_ids: List[int], page: int = 1, results_per_page: int = 5) -> \
|
649 |
+
List[Dict[str, Any]]:
|
650 |
+
"""
|
651 |
+
Perform a full-text search within the specified chat IDs using FTS5.
|
652 |
+
|
653 |
+
Args:
|
654 |
+
query (str): The user's query.
|
655 |
+
relevant_chat_ids (List[int]): List of chat IDs to search within.
|
656 |
+
page (int): Pagination page number.
|
657 |
+
results_per_page (int): Number of results per page.
|
658 |
+
|
659 |
+
Returns:
|
660 |
+
List[Dict[str, Any]]: List of search results with content and metadata.
|
661 |
+
"""
|
662 |
+
try:
|
663 |
+
# Construct a WHERE clause to limit the search to relevant chat IDs
|
664 |
+
where_clause = " OR ".join([f"media_id = {chat_id}" for chat_id in relevant_chat_ids])
|
665 |
+
if not where_clause:
|
666 |
+
where_clause = "1" # No restriction if no chat IDs
|
667 |
+
|
668 |
+
# Perform full-text search using FTS5
|
669 |
+
fts_results = search_db(query, ["content"], where_clause, page=page, results_per_page=results_per_page)
|
670 |
+
|
671 |
+
filtered_fts_results = [
|
672 |
+
{
|
673 |
+
"content": result['content'],
|
674 |
+
"metadata": {"media_id": result['id']}
|
675 |
+
}
|
676 |
+
for result in fts_results
|
677 |
+
if result['id'] in relevant_chat_ids
|
678 |
+
]
|
679 |
+
return filtered_fts_results
|
680 |
+
except Exception as e:
|
681 |
+
logging.error(f"Error in perform_full_text_search_chat: {str(e)}")
|
682 |
+
return []
|
683 |
+
|
684 |
+
|
685 |
+
def fetch_all_chats() -> List[Dict[str, Any]]:
|
686 |
+
"""
|
687 |
+
Fetch all chat messages from the database.
|
688 |
+
|
689 |
+
Returns:
|
690 |
+
List[Dict[str, Any]]: List of chat messages with relevant metadata.
|
691 |
+
"""
|
692 |
+
try:
|
693 |
+
chats = get_character_chats() # Modify this function to retrieve all chats
|
694 |
+
return chats
|
695 |
+
except Exception as e:
|
696 |
+
logging.error(f"Error fetching all chats: {str(e)}")
|
697 |
+
return []
|
698 |
+
|
699 |
+
#
|
700 |
+
# End of Character_Chat_DB.py
|
701 |
+
#######################################################################################################################
|
App_Function_Libraries/DB/DB_Manager.py
CHANGED
@@ -309,7 +309,10 @@ def add_media_to_database(*args, **kwargs):
|
|
309 |
result = sqlite_add_media_to_database(*args, **kwargs)
|
310 |
|
311 |
# Extract content
|
312 |
-
segments = args[2]
|
|
|
|
|
|
|
313 |
if isinstance(segments, list):
|
314 |
content = ' '.join([segment.get('Text', '') for segment in segments if 'Text' in segment])
|
315 |
elif isinstance(segments, dict):
|
|
|
309 |
result = sqlite_add_media_to_database(*args, **kwargs)
|
310 |
|
311 |
# Extract content
|
312 |
+
segments = kwargs.get('segments') if 'segments' in kwargs else args[2] if len(args) > 2 else None
|
313 |
+
if segments is None:
|
314 |
+
raise ValueError("Segments not provided in arguments")
|
315 |
+
|
316 |
if isinstance(segments, list):
|
317 |
content = ' '.join([segment.get('Text', '') for segment in segments if 'Text' in segment])
|
318 |
elif isinstance(segments, dict):
|
App_Function_Libraries/DB/SQLite_DB.py
CHANGED
@@ -1181,8 +1181,6 @@ def is_valid_date(date_string: str) -> bool:
|
|
1181 |
return False
|
1182 |
|
1183 |
|
1184 |
-
|
1185 |
-
|
1186 |
def add_media_to_database(url, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model, media_type='video', overwrite=False, db=None):
|
1187 |
if db is None:
|
1188 |
db = Database()
|
@@ -1196,6 +1194,7 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
|
|
1196 |
url_hash = hashlib.md5(f"{title}{media_type}".encode()).hexdigest()
|
1197 |
url = f"https://No-URL-Submitted.com/{media_type}/{quote(title)}-{url_hash}"
|
1198 |
|
|
|
1199 |
|
1200 |
# Extract content from segments
|
1201 |
if isinstance(segments, list):
|
@@ -1217,15 +1216,24 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
|
|
1217 |
cursor.execute('SELECT id FROM Media WHERE url = ?', (url,))
|
1218 |
existing_media = cursor.fetchone()
|
1219 |
|
|
|
|
|
|
|
1220 |
if existing_media:
|
|
|
|
|
1221 |
if overwrite:
|
1222 |
-
|
1223 |
cursor.execute('''
|
1224 |
UPDATE Media
|
1225 |
SET content = ?, transcription_model = ?, title = ?, type = ?, author = ?, ingestion_date = ?, chunking_status = ?
|
1226 |
WHERE id = ?
|
1227 |
''', (content, whisper_model, info_dict.get('title', 'Untitled'), media_type,
|
1228 |
info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), 'pending', media_id))
|
|
|
|
|
|
|
|
|
1229 |
else:
|
1230 |
cursor.execute('''
|
1231 |
INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model, chunking_status)
|
@@ -1233,12 +1241,17 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
|
|
1233 |
''', (url, info_dict.get('title', 'Untitled'), media_type, content,
|
1234 |
info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), whisper_model, 'pending'))
|
1235 |
media_id = cursor.lastrowid
|
|
|
|
|
1236 |
|
1237 |
-
|
1238 |
-
|
1239 |
-
|
1240 |
-
|
1241 |
-
|
|
|
|
|
|
|
1242 |
|
1243 |
# Process keywords
|
1244 |
for keyword in keyword_list:
|
@@ -1266,7 +1279,8 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
|
|
1266 |
schedule_chunking(media_id, content, info_dict.get('title', 'Untitled'))
|
1267 |
|
1268 |
action = "updated" if existing_media and overwrite else "added"
|
1269 |
-
return f"Media '{info_dict.get('title', 'Untitled')}' {action}
|
|
|
1270 |
|
1271 |
except DatabaseError as e:
|
1272 |
logging.error(f"Database error: {e}")
|
|
|
1181 |
return False
|
1182 |
|
1183 |
|
|
|
|
|
1184 |
def add_media_to_database(url, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model, media_type='video', overwrite=False, db=None):
|
1185 |
if db is None:
|
1186 |
db = Database()
|
|
|
1194 |
url_hash = hashlib.md5(f"{title}{media_type}".encode()).hexdigest()
|
1195 |
url = f"https://No-URL-Submitted.com/{media_type}/{quote(title)}-{url_hash}"
|
1196 |
|
1197 |
+
logging.debug(f"Checking for existing media with URL: {url}")
|
1198 |
|
1199 |
# Extract content from segments
|
1200 |
if isinstance(segments, list):
|
|
|
1216 |
cursor.execute('SELECT id FROM Media WHERE url = ?', (url,))
|
1217 |
existing_media = cursor.fetchone()
|
1218 |
|
1219 |
+
logging.debug(f"Existing media: {existing_media}")
|
1220 |
+
logging.debug(f"Overwrite flag: {overwrite}")
|
1221 |
+
|
1222 |
if existing_media:
|
1223 |
+
media_id = existing_media[0]
|
1224 |
+
logging.debug(f"Existing media_id: {media_id}")
|
1225 |
if overwrite:
|
1226 |
+
logging.debug("Updating existing media")
|
1227 |
cursor.execute('''
|
1228 |
UPDATE Media
|
1229 |
SET content = ?, transcription_model = ?, title = ?, type = ?, author = ?, ingestion_date = ?, chunking_status = ?
|
1230 |
WHERE id = ?
|
1231 |
''', (content, whisper_model, info_dict.get('title', 'Untitled'), media_type,
|
1232 |
info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), 'pending', media_id))
|
1233 |
+
action = "updated"
|
1234 |
+
else:
|
1235 |
+
logging.debug("Media exists but not updating (overwrite=False)")
|
1236 |
+
action = "already exists (not updated)"
|
1237 |
else:
|
1238 |
cursor.execute('''
|
1239 |
INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model, chunking_status)
|
|
|
1241 |
''', (url, info_dict.get('title', 'Untitled'), media_type, content,
|
1242 |
info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), whisper_model, 'pending'))
|
1243 |
media_id = cursor.lastrowid
|
1244 |
+
action = "added"
|
1245 |
+
logging.debug(f"New media_id: {media_id}")
|
1246 |
|
1247 |
+
logging.debug(f"Before MediaModifications insert, media_id: {media_id}")
|
1248 |
+
|
1249 |
+
# Only proceed with modifications if the media was added or updated
|
1250 |
+
if action in ["updated", "added"]:
|
1251 |
+
cursor.execute('''
|
1252 |
+
INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
|
1253 |
+
VALUES (?, ?, ?, ?)
|
1254 |
+
''', (media_id, custom_prompt_input, summary, datetime.now().strftime('%Y-%m-%d')))
|
1255 |
|
1256 |
# Process keywords
|
1257 |
for keyword in keyword_list:
|
|
|
1279 |
schedule_chunking(media_id, content, info_dict.get('title', 'Untitled'))
|
1280 |
|
1281 |
action = "updated" if existing_media and overwrite else "added"
|
1282 |
+
return f"Media '{info_dict.get('title', 'Untitled')}' {action} with URL: {url}" + \
|
1283 |
+
(f" and keywords: {', '.join(keyword_list)}. Chunking scheduled." if action in ["updated", "added"] else "")
|
1284 |
|
1285 |
except DatabaseError as e:
|
1286 |
logging.error(f"Database error: {e}")
|
App_Function_Libraries/Utils/Utils.py
CHANGED
@@ -15,8 +15,6 @@
|
|
15 |
# 6. normalize_title(title)
|
16 |
# 7.
|
17 |
#
|
18 |
-
#
|
19 |
-
#
|
20 |
####################
|
21 |
#
|
22 |
# Import necessary libraries
|
@@ -256,6 +254,7 @@ def load_and_log_configs():
|
|
256 |
logging.debug(f"Loaded Tabby API IP: {tabby_api_IP}")
|
257 |
logging.debug(f"Loaded VLLM API URL: {vllm_api_url}")
|
258 |
|
|
|
259 |
# Retrieve output paths from the configuration file
|
260 |
output_path = config.get('Paths', 'output_path', fallback='results')
|
261 |
logging.debug(f"Output path set to: {output_path}")
|
@@ -264,6 +263,18 @@ def load_and_log_configs():
|
|
264 |
processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
|
265 |
logging.debug(f"Processing choice set to: {processing_choice}")
|
266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
# Prompts - FIXME
|
268 |
prompt_path = config.get('Prompts', 'prompt_path', fallback='Databases/prompts.db')
|
269 |
|
@@ -320,6 +331,16 @@ def load_and_log_configs():
|
|
320 |
'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200),
|
321 |
'chroma_db_path': get_project_relative_path(config.get('Database', 'chroma_db_path', fallback='Databases/chroma.db'))
|
322 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
}
|
324 |
|
325 |
except Exception as e:
|
@@ -513,31 +534,49 @@ def create_download_directory(title):
|
|
513 |
return session_path
|
514 |
|
515 |
|
|
|
|
|
|
|
516 |
def safe_read_file(file_path):
|
517 |
-
encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1', 'iso-8859-1', 'cp1252']
|
|
|
|
|
518 |
|
519 |
try:
|
520 |
with open(file_path, 'rb') as file:
|
521 |
raw_data = file.read()
|
522 |
except FileNotFoundError:
|
|
|
523 |
return f"File not found: {file_path}"
|
524 |
except Exception as e:
|
|
|
525 |
return f"An error occurred while reading the file: {e}"
|
526 |
|
|
|
|
|
|
|
|
|
527 |
# Use chardet to detect the encoding
|
528 |
detected = chardet.detect(raw_data)
|
529 |
if detected['encoding'] is not None:
|
530 |
encodings.insert(0, detected['encoding'])
|
|
|
531 |
|
532 |
for encoding in encodings:
|
533 |
try:
|
534 |
decoded_content = raw_data.decode(encoding)
|
535 |
-
if
|
|
|
|
|
536 |
return decoded_content
|
537 |
except UnicodeDecodeError:
|
|
|
538 |
continue
|
539 |
|
540 |
-
|
|
|
|
|
|
|
541 |
|
542 |
#
|
543 |
# End of Files-saving Function Definitions
|
|
|
15 |
# 6. normalize_title(title)
|
16 |
# 7.
|
17 |
#
|
|
|
|
|
18 |
####################
|
19 |
#
|
20 |
# Import necessary libraries
|
|
|
254 |
logging.debug(f"Loaded Tabby API IP: {tabby_api_IP}")
|
255 |
logging.debug(f"Loaded VLLM API URL: {vllm_api_url}")
|
256 |
|
257 |
+
|
258 |
# Retrieve output paths from the configuration file
|
259 |
output_path = config.get('Paths', 'output_path', fallback='results')
|
260 |
logging.debug(f"Output path set to: {output_path}")
|
|
|
263 |
processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
|
264 |
logging.debug(f"Processing choice set to: {processing_choice}")
|
265 |
|
266 |
+
# Retrieve Embedding model settings from the configuration file
|
267 |
+
embedding_model = config.get('Embeddings', 'embedding_model', fallback='')
|
268 |
+
logging.debug(f"Embedding model set to: {embedding_model}")
|
269 |
+
embedding_provider = config.get('Embeddings', 'embedding_provider', fallback='')
|
270 |
+
embedding_model = config.get('Embeddings', 'embedding_model', fallback='')
|
271 |
+
onnx_model_path = config.get('Embeddings', 'onnx_model_path', fallback="./App_Function_Libraries/onnx_models/text-embedding-3-small.onnx")
|
272 |
+
model_dir = config.get('Embeddings', 'model_dir', fallback="./App_Function_Libraries/onnx_models")
|
273 |
+
embedding_api_url = config.get('Embeddings', 'embedding_api_url', fallback="http://localhost:8080/v1/embeddings")
|
274 |
+
embedding_api_key = config.get('Embeddings', 'embedding_api_key', fallback='')
|
275 |
+
chunk_size = config.get('Embeddings', 'chunk_size', fallback=400)
|
276 |
+
overlap = config.get('Embeddings', 'overlap', fallback=200)
|
277 |
+
|
278 |
# Prompts - FIXME
|
279 |
prompt_path = config.get('Prompts', 'prompt_path', fallback='Databases/prompts.db')
|
280 |
|
|
|
331 |
'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200),
|
332 |
'chroma_db_path': get_project_relative_path(config.get('Database', 'chroma_db_path', fallback='Databases/chroma.db'))
|
333 |
},
|
334 |
+
'embedding_config': {
|
335 |
+
'embedding_provider': embedding_provider,
|
336 |
+
'embedding_model': embedding_model,
|
337 |
+
'onnx_model_path': onnx_model_path,
|
338 |
+
'model_dir': model_dir,
|
339 |
+
'embedding_api_url': embedding_api_url,
|
340 |
+
'embedding_api_key': embedding_api_key,
|
341 |
+
'chunk_size': chunk_size,
|
342 |
+
'overlap': overlap
|
343 |
+
}
|
344 |
}
|
345 |
|
346 |
except Exception as e:
|
|
|
534 |
return session_path
|
535 |
|
536 |
|
537 |
+
import chardet
|
538 |
+
import logging
|
539 |
+
|
540 |
def safe_read_file(file_path):
|
541 |
+
encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1', 'iso-8859-1', 'cp1252', 'utf-8-sig']
|
542 |
+
|
543 |
+
logging.info(f"Attempting to read file: {file_path}")
|
544 |
|
545 |
try:
|
546 |
with open(file_path, 'rb') as file:
|
547 |
raw_data = file.read()
|
548 |
except FileNotFoundError:
|
549 |
+
logging.error(f"File not found: {file_path}")
|
550 |
return f"File not found: {file_path}"
|
551 |
except Exception as e:
|
552 |
+
logging.error(f"An error occurred while reading the file: {e}")
|
553 |
return f"An error occurred while reading the file: {e}"
|
554 |
|
555 |
+
if not raw_data:
|
556 |
+
logging.warning(f"File is empty: {file_path}")
|
557 |
+
return ""
|
558 |
+
|
559 |
# Use chardet to detect the encoding
|
560 |
detected = chardet.detect(raw_data)
|
561 |
if detected['encoding'] is not None:
|
562 |
encodings.insert(0, detected['encoding'])
|
563 |
+
logging.info(f"Detected encoding: {detected['encoding']}")
|
564 |
|
565 |
for encoding in encodings:
|
566 |
try:
|
567 |
decoded_content = raw_data.decode(encoding)
|
568 |
+
# Check if the content is mostly printable
|
569 |
+
if sum(c.isprintable() for c in decoded_content) / len(decoded_content) > 0.95:
|
570 |
+
logging.info(f"Successfully decoded file with encoding: {encoding}")
|
571 |
return decoded_content
|
572 |
except UnicodeDecodeError:
|
573 |
+
logging.debug(f"Failed to decode with {encoding}")
|
574 |
continue
|
575 |
|
576 |
+
# If all decoding attempts fail, return the error message
|
577 |
+
logging.error(f"Unable to decode the file {file_path}")
|
578 |
+
return f"Unable to decode the file {file_path}"
|
579 |
+
|
580 |
|
581 |
#
|
582 |
# End of Files-saving Function Definitions
|