Spaces:

oceansweep
/

tldw

Running

App Files Files Community

oceansweep commited on Oct 13

Commit

c8ebc55

•

1 Parent(s): f71d2e6

Upload 11 files

Browse files

Files changed (7) hide show

App_Function_Libraries/Books/Book_Ingestion_Lib.py +394 -60
App_Function_Libraries/Character_Chat/Character_Chat_Lib.py +541 -0
App_Function_Libraries/Character_Chat/__init__.py +0 -0
App_Function_Libraries/DB/Character_Chat_DB.py +701 -684
App_Function_Libraries/DB/DB_Manager.py +4 -1
App_Function_Libraries/DB/SQLite_DB.py +23 -9
App_Function_Libraries/Utils/Utils.py +44 -5

App_Function_Libraries/Books/Book_Ingestion_Lib.py CHANGED Viewed

@@ -14,35 +14,290 @@
 # Import necessary libraries
 import os
 import re
 from datetime import datetime
 import logging
 import ebooklib
 from bs4 import BeautifulSoup
 from ebooklib import epub
 #
 # Import Local
-from App_Function_Libraries.DB.DB_Manager import add_media_with_keywords
 #
 #######################################################################################################################
 # Function Definitions
 #
 def read_epub(file_path):
-    """Read and extract text from an EPUB file."""
-    book = epub.read_epub(file_path)
-    chapters = []
-    for item in book.get_items():
-        if item.get_type() == ebooklib.ITEM_DOCUMENT:
-            chapters.append(item.get_content())
-    text = ""
-    for html_content in chapters:
-        soup = BeautifulSoup(html_content, 'html.parser')
-        text += soup.get_text() + "\n\n"
-    return text
 # Ingest a text file into the database with Title/Author/Keywords
@@ -57,15 +312,28 @@ def extract_epub_metadata(content):
 def ingest_text_file(file_path, title=None, author=None, keywords=None):
     try:
         with open(file_path, 'r', encoding='utf-8') as file:
             content = file.read()
         # Check if it's a converted epub and extract metadata if so
-        if 'epub_converted' in (keywords or ''):
             extracted_title, extracted_author = extract_epub_metadata(content)
             title = title or extracted_title
             author = author or extracted_author
         # If title is still not provided, use the filename without extension
         if not title:
@@ -95,6 +363,7 @@ def ingest_text_file(file_path, title=None, author=None, keywords=None):
             ingestion_date=datetime.now().strftime('%Y-%m-%d')
         )
         return f"Text file '{title}' by {author} ingested successfully."
     except Exception as e:
         logging.error(f"Error ingesting text file: {str(e)}")
@@ -102,68 +371,133 @@ def ingest_text_file(file_path, title=None, author=None, keywords=None):
 def ingest_folder(folder_path, keywords=None):
     results = []
-    for filename in os.listdir(folder_path):
-        if filename.lower().endswith('.txt'):
-            file_path = os.path.join(folder_path, filename)
-            result = ingest_text_file(file_path, keywords=keywords)
-            results.append(result)
 def epub_to_markdown(epub_path):
-    book = epub.read_epub(epub_path)
-    markdown_content = "# Table of Contents\n\n"
-    chapters = []
-    # Extract and format the table of contents
-    toc = book.toc
-    for item in toc:
-        if isinstance(item, tuple):
-            section, children = item
-            level = 1
-            markdown_content += format_toc_item(section, level)
-            for child in children:
-                markdown_content += format_toc_item(child, level + 1)
-        else:
-            markdown_content += format_toc_item(item, 1)
-    markdown_content += "\n---\n\n"
-    # Process each chapter
-    for item in book.get_items():
-        if item.get_type() == ebooklib.ITEM_DOCUMENT:
-            chapter_content = item.get_content().decode('utf-8')
-            soup = BeautifulSoup(chapter_content, 'html.parser')
-            # Extract chapter title
-            title = soup.find(['h1', 'h2', 'h3'])
-            if title:
-                chapter_title = title.get_text()
-                markdown_content += f"# {chapter_title}\n\n"
-            # Process chapter content
-            for elem in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol']):
-                if elem.name.startswith('h'):
-                    level = int(elem.name[1])
-                    markdown_content += f"{'#' * level} {elem.get_text()}\n\n"
-                elif elem.name == 'p':
-                    markdown_content += f"{elem.get_text()}\n\n"
-                elif elem.name in ['ul', 'ol']:
-                    for li in elem.find_all('li'):
-                        markdown_content += f"- {li.get_text()}\n"
-                    markdown_content += "\n"
-            markdown_content += "---\n\n"
-    return markdown_content
-def format_toc_item(item, level):
-    return f"{'  ' * (level - 1)}- [{item.title}](#{slugify(item.title)})\n"
 def slugify(text):
-    return re.sub(r'[\W_]+', '-', text.lower())
 #
 # End of Function Definitions

 # Import necessary libraries
 import os
 import re
+import tempfile
+import zipfile
 from datetime import datetime
 import logging
 import ebooklib
 from bs4 import BeautifulSoup
 from ebooklib import epub
+from App_Function_Libraries.Chunk_Lib import chunk_ebook_by_chapters
 #
 # Import Local
+from App_Function_Libraries.DB.DB_Manager import add_media_with_keywords, add_media_to_database
+from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
 #
 #######################################################################################################################
 # Function Definitions
 #
+def import_epub(file_path, title=None, author=None, keywords=None, custom_prompt=None, system_prompt=None, summary=None,
+               auto_summarize=False, api_name=None, api_key=None, chunk_options=None, custom_chapter_pattern=None):
+    """
+    Imports an EPUB file, extracts its content, chunks it, optionally summarizes it, and adds it to the database.
+    Parameters:
+        - file_path (str): Path to the EPUB file.
+        - title (str, optional): Title of the book.
+        - author (str, optional): Author of the book.
+        - keywords (str, optional): Comma-separated keywords for the book.
+        - custom_prompt (str, optional): Custom user prompt for summarization.
+        - summary (str, optional): Predefined summary of the book.
+        - auto_summarize (bool, optional): Whether to auto-summarize the chunks.
+        - api_name (str, optional): API name for summarization.
+        - api_key (str, optional): API key for summarization.
+        - chunk_options (dict, optional): Options for chunking.
+        - custom_chapter_pattern (str, optional): Custom regex pattern for chapter detection.
+    Returns:
+        - str: Status message indicating success or failure.
+    """
+    try:
+        logging.info(f"Importing EPUB file from {file_path}")
+        # Convert EPUB to Markdown
+        markdown_content = epub_to_markdown(file_path)
+        logging.debug("Converted EPUB to Markdown.")
+        # Extract metadata if not provided
+        if not title or not author:
+            extracted_title, extracted_author = extract_epub_metadata(markdown_content)
+            title = title or extracted_title or os.path.splitext(os.path.basename(file_path))[0]
+            author = author or extracted_author or "Unknown"
+            logging.debug(f"Extracted metadata - Title: {title}, Author: {author}")
+        # Process keywords
+        keyword_list = [kw.strip() for kw in keywords.split(',')] if keywords else []
+        logging.debug(f"Keywords: {keyword_list}")
+        # Set default chunk options if not provided
+        if chunk_options is None:
+            chunk_options = {
+                'method': 'chapter',
+                'max_size': 500,
+                'overlap': 200,
+                'custom_chapter_pattern': custom_chapter_pattern
+            }
+        else:
+            # Ensure 'method' is set to 'chapter' when using chapter chunking
+            chunk_options.setdefault('method', 'chapter')
+            chunk_options.setdefault('custom_chapter_pattern', custom_chapter_pattern)
+        # Chunk the content by chapters
+        chunks = chunk_ebook_by_chapters(markdown_content, chunk_options)
+        logging.info(f"Total chunks created: {len(chunks)}")
+        if chunks:
+            logging.debug(f"Structure of first chunk: {chunks[0].keys()}")
+        # Handle summarization if enabled
+        if auto_summarize and api_name and api_key:
+            logging.info("Auto-summarization is enabled.")
+            summarized_chunks = []
+            for chunk in chunks:
+                chunk_text = chunk.get('text', '')
+                if chunk_text:
+                    summary_text = perform_summarization(api_name, chunk_text, custom_prompt, api_key, recursive_summarization=False, temp=None, system_message=system_prompt)
+                    chunk['metadata']['summary'] = summary_text
+                    summarized_chunks.append(chunk)
+            chunks = summarized_chunks
+            logging.info("Summarization of chunks completed.")
+        else:
+            # If not summarizing, set a default summary or use provided summary
+            if summary:
+                logging.debug("Using provided summary.")
+            else:
+                summary = "No summary provided."
+        # Create info_dict
+        info_dict = {
+            'title': title,
+            'uploader': author,
+            'ingestion_date': datetime.now().strftime('%Y-%m-%d')
+        }
+        # Prepare segments for database
+        segments = [{'Text': chunk.get('text', chunk.get('content', ''))} for chunk in chunks]
+        logging.debug(f"Prepared segments for database. Number of segments: {len(segments)}")
+        # Add to database
+        result = add_media_to_database(
+            url=file_path,
+            info_dict=info_dict,
+            segments=segments,
+            summary=summary,
+            keywords=keyword_list,
+            custom_prompt_input=custom_prompt,
+            whisper_model="Imported",
+            media_type="ebook",
+            overwrite=False
+        )
+        logging.info(f"Ebook '{title}' by {author} imported successfully. Database result: {result}")
+        return f"Ebook '{title}' by {author} imported successfully. Database result: {result}"
+    except Exception as e:
+        logging.exception(f"Error importing ebook: {str(e)}")
+        return f"Error importing ebook: {str(e)}"
+# FIXME
+def process_zip_file(zip_file, title, author, keywords, custom_prompt, system_prompt, summary, auto_summarize, api_name, api_key, chunk_options):
+    """
+    Processes a ZIP file containing multiple EPUB files and imports each one.
+    Parameters:
+        - zip_file (file-like object): The ZIP file to process.
+        - title (str): Title prefix for the books.
+        - author (str): Author name for the books.
+        - keywords (str): Comma-separated keywords.
+        - custom_prompt (str): Custom user prompt for summarization.
+        - summary (str): Predefined summary (not used in this context).
+        - auto_summarize (bool): Whether to auto-summarize the chunks.
+        - api_name (str): API name for summarization.
+        - api_key (str): API key for summarization.
+        - chunk_options (dict): Options for chunking.
+    Returns:
+        - str: Combined status messages for all EPUB files in the ZIP.
+    """
+    results = []
+    try:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            zip_path = zip_file.name if hasattr(zip_file, 'name') else zip_file.path
+            logging.info(f"Extracting ZIP file {zip_path} to temporary directory {temp_dir}")
+            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                zip_ref.extractall(temp_dir)
+            for filename in os.listdir(temp_dir):
+                if filename.lower().endswith('.epub'):
+                    file_path = os.path.join(temp_dir, filename)
+                    logging.info(f"Processing EPUB file {filename} from ZIP.")
+                    result = import_epub(
+                        file_path=file_path,
+                        title=title,
+                        author=author,
+                        keywords=keywords,
+                        custom_prompt=custom_prompt,
+                        summary=summary,
+                        auto_summarize=auto_summarize,
+                        api_name=api_name,
+                        api_key=api_key,
+                        chunk_options=chunk_options,
+                        custom_chapter_pattern=chunk_options.get('custom_chapter_pattern') if chunk_options else None
+                    )
+                    results.append(f"File: {filename} - {result}")
+            logging.info("Completed processing all EPUB files in the ZIP.")
+    except Exception as e:
+        logging.exception(f"Error processing ZIP file: {str(e)}")
+        return f"Error processing ZIP file: {str(e)}"
+    return "\n".join(results)
+def import_file_handler(file, title, author, keywords, system_prompt, custom_prompt, auto_summarize, api_name,
+                        api_key, max_chunk_size, chunk_overlap, custom_chapter_pattern):
+    try:
+        # Handle max_chunk_size
+        if isinstance(max_chunk_size, str):
+            max_chunk_size = int(max_chunk_size) if max_chunk_size.strip() else 4000
+        elif not isinstance(max_chunk_size, int):
+            max_chunk_size = 4000  # Default value if not a string or int
+        # Handle chunk_overlap
+        if isinstance(chunk_overlap, str):
+            chunk_overlap = int(chunk_overlap) if chunk_overlap.strip() else 0
+        elif not isinstance(chunk_overlap, int):
+            chunk_overlap = 0  # Default value if not a string or int
+        chunk_options = {
+            'method': 'chapter',
+            'max_size': max_chunk_size,
+            'overlap': chunk_overlap,
+            'custom_chapter_pattern': custom_chapter_pattern if custom_chapter_pattern else None
+        }
+        if file is None:
+            return "No file uploaded."
+        file_path = file.name
+        if not os.path.exists(file_path):
+            return "Uploaded file not found."
+        if file_path.lower().endswith('.epub'):
+            status = import_epub(
+                file_path,
+                title,
+                author,
+                keywords,
+                custom_prompt=custom_prompt,
+                system_prompt=system_prompt,
+                summary=None,
+                auto_summarize=auto_summarize,
+                api_name=api_name,
+                api_key=api_key,
+                chunk_options=chunk_options,
+                custom_chapter_pattern=custom_chapter_pattern
+            )
+            return f"📚 EPUB Imported Successfully:\n{status}"
+        elif file.name.lower().endswith('.zip'):
+            status = process_zip_file(
+                zip_file=file,
+                title=title,
+                author=author,
+                keywords=keywords,
+                custom_prompt=custom_prompt,
+                system_prompt=system_prompt,
+                summary=None,  # Let the library handle summarization
+                auto_summarize=auto_summarize,
+                api_name=api_name,
+                api_key=api_key,
+                chunk_options=chunk_options
+            )
+            return f"📦 ZIP Processed Successfully:\n{status}"
+        elif file.name.lower().endswith(('.chm', '.html', '.pdf', '.xml', '.opml')):
+            file_type = file.name.split('.')[-1].upper()
+            return f"{file_type} file import is not yet supported."
+        else:
+            return "❌ Unsupported file type. Please upload an `.epub` file or a `.zip` file containing `.epub` files."
+    except ValueError as ve:
+        logging.exception(f"Error parsing input values: {str(ve)}")
+        return f"❌ Error: Invalid input for chunk size or overlap. Please enter valid numbers."
+    except Exception as e:
+        logging.exception(f"Error during file import: {str(e)}")
+        return f"❌ Error during import: {str(e)}"
 def read_epub(file_path):
+    """
+    Reads and extracts text from an EPUB file.
+    Parameters:
+        - file_path (str): Path to the EPUB file.
+    Returns:
+        - str: Extracted text content from the EPUB.
+    """
+    try:
+        logging.info(f"Reading EPUB file from {file_path}")
+        book = epub.read_epub(file_path)
+        chapters = []
+        for item in book.get_items():
+            if item.get_type() == ebooklib.ITEM_DOCUMENT:
+                chapters.append(item.get_content())
+        text = ""
+        for html_content in chapters:
+            soup = BeautifulSoup(html_content, 'html.parser')
+            text += soup.get_text(separator='\n\n') + "\n\n"
+        logging.debug("EPUB content extraction completed.")
+        return text
+    except Exception as e:
+        logging.exception(f"Error reading EPUB file: {str(e)}")
+        raise
 # Ingest a text file into the database with Title/Author/Keywords
 def ingest_text_file(file_path, title=None, author=None, keywords=None):
+    """
+    Ingests a plain text file into the database with optional metadata.
+    Parameters:
+        - file_path (str): Path to the text file.
+        - title (str, optional): Title of the document.
+        - author (str, optional): Author of the document.
+        - keywords (str, optional): Comma-separated keywords.
+    Returns:
+        - str: Status message indicating success or failure.
+    """
     try:
         with open(file_path, 'r', encoding='utf-8') as file:
             content = file.read()
         # Check if it's a converted epub and extract metadata if so
+        if 'epub_converted' in (keywords or '').lower():
             extracted_title, extracted_author = extract_epub_metadata(content)
             title = title or extracted_title
             author = author or extracted_author
+            logging.debug(f"Extracted metadata for converted EPUB - Title: {title}, Author: {author}")
         # If title is still not provided, use the filename without extension
         if not title:
             ingestion_date=datetime.now().strftime('%Y-%m-%d')
         )
+        logging.info(f"Text file '{title}' by {author} ingested successfully.")
         return f"Text file '{title}' by {author} ingested successfully."
     except Exception as e:
         logging.error(f"Error ingesting text file: {str(e)}")
 def ingest_folder(folder_path, keywords=None):
+    """
+    Ingests all text files within a specified folder.
+    Parameters:
+        - folder_path (str): Path to the folder containing text files.
+        - keywords (str, optional): Comma-separated keywords to add to each file.
+    Returns:
+        - str: Combined status messages for all ingested text files.
+    """
     results = []
+    try:
+        logging.info(f"Ingesting all text files from folder {folder_path}")
+        for filename in os.listdir(folder_path):
+            if filename.lower().endswith('.txt'):
+                file_path = os.path.join(folder_path, filename)
+                result = ingest_text_file(file_path, keywords=keywords)
+                results.append(result)
+        logging.info("Completed ingestion of all text files in the folder.")
+    except Exception as e:
+        logging.exception(f"Error ingesting folder: {str(e)}")
+        return f"Error ingesting folder: {str(e)}"
+    return "\n".join(results)
 def epub_to_markdown(epub_path):
+    """
+    Converts an EPUB file to Markdown format, including the table of contents and chapter contents.
+    Parameters:
+        - epub_path (str): Path to the EPUB file.
+    Returns:
+        - str: Markdown-formatted content of the EPUB.
+    """
+    try:
+        logging.info(f"Converting EPUB to Markdown from {epub_path}")
+        book = epub.read_epub(epub_path)
+        markdown_content = "# Table of Contents\n\n"
+        chapters = []
+        # Extract and format the table of contents
+        toc = book.toc
+        for item in toc:
+            if isinstance(item, tuple):
+                section, children = item
+                level = 1
+                markdown_content += format_toc_item(section, level)
+                for child in children:
+                    markdown_content += format_toc_item(child, level + 1)
+            else:
+                markdown_content += format_toc_item(item, 1)
+        markdown_content += "\n---\n\n"
+        # Process each chapter
+        for item in book.get_items():
+            if item.get_type() == ebooklib.ITEM_DOCUMENT:
+                chapter_content = item.get_content().decode('utf-8')
+                soup = BeautifulSoup(chapter_content, 'html.parser')
+                # Extract chapter title
+                title = soup.find(['h1', 'h2', 'h3'])
+                if title:
+                    chapter_title = title.get_text()
+                    markdown_content += f"# {chapter_title}\n\n"
+                # Process chapter content
+                for elem in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol']):
+                    if elem.name.startswith('h'):
+                        level = int(elem.name[1])
+                        markdown_content += f"{'#' * level} {elem.get_text()}\n\n"
+                    elif elem.name == 'p':
+                        markdown_content += f"{elem.get_text()}\n\n"
+                    elif elem.name in ['ul', 'ol']:
+                        for li in elem.find_all('li'):
+                            prefix = '-' if elem.name == 'ul' else '1.'
+                            markdown_content += f"{prefix} {li.get_text()}\n"
+                        markdown_content += "\n"
+                markdown_content += "---\n\n"
+        logging.debug("EPUB to Markdown conversion completed.")
+        return markdown_content
+    except Exception as e:
+        logging.exception(f"Error converting EPUB to Markdown: {str(e)}")
+        raise
+def format_toc_item(item, level):
+    """
+    Formats a table of contents item into Markdown list format.
+    Parameters:
+        - item (epub.Link or epub.Section): TOC item.
+        - level (int): Heading level for indentation.
+    Returns:
+        - str: Markdown-formatted TOC item.
+    """
+    try:
+        if isinstance(item, epub.Link):
+            title = item.title
+        elif isinstance(item, epub.Section):
+            title = item.title
+        else:
+            title = str(item)
+        return f"{'  ' * (level - 1)}- [{title}](#{slugify(title)})\n"
+    except Exception as e:
+        logging.exception(f"Error formatting TOC item: {str(e)}")
+        return ""
 def slugify(text):
+    """
+    Converts a string into a slug suitable for Markdown links.
+    Parameters:
+        - text (str): The text to slugify.
+    Returns:
+        - str: Slugified text.
+    """
+    return re.sub(r'[\W_]+', '-', text.lower()).strip('-')
 #
 # End of Function Definitions

App_Function_Libraries/Character_Chat/Character_Chat_Lib.py ADDED Viewed

	@@ -0,0 +1,541 @@

+# Character_Chat_Lib.py
+# Description: Functions for character chat cards.
+#
+# Imports
+import json
+import logging
+import io
+import base64
+from typing import Dict, Any, Optional, List, Tuple
+#
+# External Imports
+from PIL import Image
+#
+# Local imports
+from App_Function_Libraries.DB.DB_Manager import get_character_card_by_id, get_character_chat_by_id
+#
+# Constants
+####################################################################################################
+#
+# Functions
+# Using https://github.com/malfoyslastname/character-card-spec-v2 as the standard for v2 character cards
+#################################################################################
+#
+# Placeholder functions:
+def replace_placeholders(text: str, char_name: str, user_name: str) -> str:
+    """
+    Replace placeholders in the given text with appropriate values.
+    Args:
+        text (str): The text containing placeholders.
+        char_name (str): The name of the character.
+        user_name (str): The name of the user.
+    Returns:
+        str: The text with placeholders replaced.
+    """
+    replacements = {
+        '{{char}}': char_name,
+        '{{user}}': user_name,
+        '{{random_user}}': user_name  # Assuming random_user is the same as user for simplicity
+    }
+    for placeholder, value in replacements.items():
+        text = text.replace(placeholder, value)
+    return text
+def replace_user_placeholder(history, user_name):
+    """
+    Replaces all instances of '{{user}}' in the chat history with the actual user name.
+    Args:
+        history (list): The current chat history as a list of tuples (user_message, bot_message).
+        user_name (str): The name entered by the user.
+    Returns:
+        list: Updated chat history with placeholders replaced.
+    """
+    if not user_name:
+        user_name = "User"  # Default name if none provided
+    updated_history = []
+    for user_msg, bot_msg in history:
+        # Replace in user message
+        if user_msg:
+            user_msg = user_msg.replace("{{user}}", user_name)
+        # Replace in bot message
+        if bot_msg:
+            bot_msg = bot_msg.replace("{{user}}", user_name)
+        updated_history.append((user_msg, bot_msg))
+    return updated_history
+#
+# End of Placeholder functions
+#################################################################################
+#################################################################################
+#
+# f
+def extract_character_id(choice: str) -> int:
+    """Extract the character ID from the dropdown selection string."""
+    return int(choice.split('(ID: ')[1].rstrip(')'))
+def load_character_wrapper(character_id: int, user_name: str) -> Tuple[Dict[str, Any], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
+    """Wrapper function to load character and image using the extracted ID."""
+    char_data, chat_history, img = load_character_and_image(character_id, user_name)
+    return char_data, chat_history, img
+def parse_character_book(book_data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Parse the character book data from a V2 character card.
+    Args:
+        book_data (Dict[str, Any]): The raw character book data from the character card.
+    Returns:
+        Dict[str, Any]: The parsed and structured character book data.
+    """
+    parsed_book = {
+        'name': book_data.get('name', ''),
+        'description': book_data.get('description', ''),
+        'scan_depth': book_data.get('scan_depth'),
+        'token_budget': book_data.get('token_budget'),
+        'recursive_scanning': book_data.get('recursive_scanning', False),
+        'extensions': book_data.get('extensions', {}),
+        'entries': []
+    }
+    for entry in book_data.get('entries', []):
+        parsed_entry = {
+            'keys': entry['keys'],
+            'content': entry['content'],
+            'extensions': entry.get('extensions', {}),
+            'enabled': entry['enabled'],
+            'insertion_order': entry['insertion_order'],
+            'case_sensitive': entry.get('case_sensitive', False),
+            'name': entry.get('name', ''),
+            'priority': entry.get('priority'),
+            'id': entry.get('id'),
+            'comment': entry.get('comment', ''),
+            'selective': entry.get('selective', False),
+            'secondary_keys': entry.get('secondary_keys', []),
+            'constant': entry.get('constant', False),
+            'position': entry.get('position')
+        }
+        parsed_book['entries'].append(parsed_entry)
+    return parsed_book
+def load_character_and_image(character_id: int, user_name: str) -> Tuple[Optional[Dict[str, Any]], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
+    """
+    Load a character and its associated image based on the character ID.
+    Args:
+        character_id (int): The ID of the character to load.
+        user_name (str): The name of the user, used for placeholder replacement.
+    Returns:
+        Tuple[Optional[Dict[str, Any]], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
+        A tuple containing the character data, chat history, and character image (if available).
+    """
+    try:
+        char_data = get_character_card_by_id(character_id)
+        if not char_data:
+            logging.warning(f"No character data found for ID: {character_id}")
+            return None, [], None
+        # Replace placeholders in character data
+        for field in ['first_mes', 'mes_example', 'scenario', 'description', 'personality']:
+            if field in char_data:
+                char_data[field] = replace_placeholders(char_data[field], char_data['name'], user_name)
+        # Replace placeholders in first_mes
+        first_mes = char_data.get('first_mes', "Hello! I'm ready to chat.")
+        first_mes = replace_placeholders(first_mes, char_data['name'], user_name)
+        chat_history = [(None, first_mes)] if first_mes else []
+        img = None
+        if char_data.get('image'):
+            try:
+                image_data = base64.b64decode(char_data['image'])
+                img = Image.open(io.BytesIO(image_data)).convert("RGBA")
+            except Exception as e:
+                logging.error(f"Error processing image for character '{char_data['name']}': {e}")
+        return char_data, chat_history, img
+    except Exception as e:
+        logging.error(f"Error in load_character_and_image: {e}")
+        return None, [], None
+def load_chat_and_character(chat_id: int, user_name: str) -> Tuple[Optional[Dict[str, Any]], List[Tuple[str, str]], Optional[Image.Image]]:
+    """
+    Load a chat and its associated character, including the character image and process templates.
+    Args:
+        chat_id (int): The ID of the chat to load.
+        user_name (str): The name of the user.
+    Returns:
+        Tuple[Optional[Dict[str, Any]], List[Tuple[str, str]], Optional[Image.Image]]:
+        A tuple containing the character data, processed chat history, and character image (if available).
+    """
+    try:
+        # Load the chat
+        chat = get_character_chat_by_id(chat_id)
+        if not chat:
+            logging.warning(f"No chat found with ID: {chat_id}")
+            return None, [], None
+        # Load the associated character
+        character_id = chat['character_id']
+        char_data = get_character_card_by_id(character_id)
+        if not char_data:
+            logging.warning(f"No character found for chat ID: {chat_id}")
+            return None, chat['chat_history'], None
+        # Process the chat history
+        processed_history = process_chat_history(chat['chat_history'], char_data['name'], user_name)
+        # Load the character image
+        img = None
+        if char_data.get('image'):
+            try:
+                image_data = base64.b64decode(char_data['image'])
+                img = Image.open(io.BytesIO(image_data)).convert("RGBA")
+            except Exception as e:
+                logging.error(f"Error processing image for character '{char_data['name']}': {e}")
+        # Process character data templates
+        for field in ['first_mes', 'mes_example', 'scenario', 'description', 'personality']:
+            if field in char_data:
+                char_data[field] = replace_placeholders(char_data[field], char_data['name'], user_name)
+        return char_data, processed_history, img
+    except Exception as e:
+        logging.error(f"Error in load_chat_and_character: {e}")
+        return None, [], None
+def extract_json_from_image(image_file):
+    logging.debug(f"Attempting to extract JSON from image: {image_file.name}")
+    try:
+        with Image.open(image_file) as img:
+            logging.debug("Image opened successfully")
+            metadata = img.info
+            if 'chara' in metadata:
+                logging.debug("Found 'chara' in image metadata")
+                chara_content = metadata['chara']
+                logging.debug(f"Content of 'chara' metadata (first 100 chars): {chara_content[:100]}...")
+                try:
+                    decoded_content = base64.b64decode(chara_content).decode('utf-8')
+                    logging.debug(f"Decoded content (first 100 chars): {decoded_content[:100]}...")
+                    return decoded_content
+                except Exception as e:
+                    logging.error(f"Error decoding base64 content: {e}")
+            logging.warning("'chara' not found in metadata, attempting to find JSON data in image bytes")
+            # Alternative method to extract embedded JSON from image bytes if metadata is not available
+            img_byte_arr = io.BytesIO()
+            img.save(img_byte_arr, format='PNG')
+            img_bytes = img_byte_arr.getvalue()
+            img_str = img_bytes.decode('latin1')  # Use 'latin1' to preserve byte values
+            # Search for JSON-like structures in the image bytes
+            json_start = img_str.find('{')
+            json_end = img_str.rfind('}')
+            if json_start != -1 and json_end != -1 and json_end > json_start:
+                possible_json = img_str[json_start:json_end+1]
+                try:
+                    json.loads(possible_json)
+                    logging.debug("Found JSON data in image bytes")
+                    return possible_json
+                except json.JSONDecodeError:
+                    logging.debug("No valid JSON found in image bytes")
+            logging.warning("No JSON data found in the image")
+    except Exception as e:
+        logging.error(f"Error extracting JSON from image: {e}")
+    return None
+def load_chat_history(file):
+    try:
+        content = file.read().decode('utf-8')
+        chat_data = json.loads(content)
+        # Extract history and character name from the loaded data
+        history = chat_data.get('history') or chat_data.get('messages')
+        character_name = chat_data.get('character') or chat_data.get('character_name')
+        if not history or not character_name:
+            logging.error("Chat history or character name missing in the imported file.")
+            return None, None
+        return history, character_name
+    except Exception as e:
+        logging.error(f"Error loading chat history: {e}")
+        return None, None
+def process_chat_history(chat_history: List[Tuple[str, str]], char_name: str, user_name: str) -> List[Tuple[str, str]]:
+    """
+    Process the chat history to replace placeholders in both user and character messages.
+    Args:
+        chat_history (List[Tuple[str, str]]): The chat history.
+        char_name (str): The name of the character.
+        user_name (str): The name of the user.
+    Returns:
+        List[Tuple[str, str]]: The processed chat history.
+    """
+    processed_history = []
+    for user_msg, char_msg in chat_history:
+        if user_msg:
+            user_msg = replace_placeholders(user_msg, char_name, user_name)
+        if char_msg:
+            char_msg = replace_placeholders(char_msg, char_name, user_name)
+        processed_history.append((user_msg, char_msg))
+    return processed_history
+def validate_character_book(book_data):
+    """
+    Validate the 'character_book' field in the character card.
+    Args:
+        book_data (dict): The character book data.
+    Returns:
+        Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
+    """
+    validation_messages = []
+    # Optional fields with expected types
+    optional_fields = {
+        'name': str,
+        'description': str,
+        'scan_depth': (int, float),
+        'token_budget': (int, float),
+        'recursive_scanning': bool,
+        'extensions': dict,
+        'entries': list
+    }
+    for field, expected_type in optional_fields.items():
+        if field in book_data:
+            if not isinstance(book_data[field], expected_type):
+                validation_messages.append(f"Field 'character_book.{field}' must be of type '{expected_type}'.")
+    # 'entries' is required
+    if 'entries' not in book_data or not isinstance(book_data['entries'], list):
+        validation_messages.append("Field 'character_book.entries' is required and must be a list.")
+        return False, validation_messages
+    # Validate each entry in 'entries'
+    entries = book_data.get('entries', [])
+    entry_ids = set()
+    for idx, entry in enumerate(entries):
+        is_valid_entry, entry_messages = validate_character_book_entry(entry, idx, entry_ids)
+        if not is_valid_entry:
+            validation_messages.extend(entry_messages)
+    is_valid = len(validation_messages) == 0
+    return is_valid, validation_messages
+def validate_character_book_entry(entry, idx, entry_ids):
+    """
+    Validate an entry in the 'character_book.entries' list.
+    Args:
+        entry (dict): The entry data.
+        idx (int): The index of the entry in the list.
+        entry_ids (set): A set of existing entry IDs for uniqueness checking.
+    Returns:
+        Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
+    """
+    validation_messages = []
+    required_fields = {
+        'keys': list,
+        'content': str,
+        'extensions': dict,
+        'enabled': bool,
+        'insertion_order': (int, float)
+    }
+    for field, expected_type in required_fields.items():
+        if field not in entry:
+            validation_messages.append(f"Entry {idx}: Missing required field '{field}'.")
+        elif not isinstance(entry[field], expected_type):
+            validation_messages.append(f"Entry {idx}: Field '{field}' must be of type '{expected_type}'.")
+        elif field == 'content' and not entry[field].strip():
+            validation_messages.append(f"Entry {idx}: Field 'content' cannot be empty.")
+        elif field == 'keys' and not entry[field]:
+            validation_messages.append(f"Entry {idx}: Field 'keys' cannot be empty.")
+    # Optional fields
+    optional_fields = {
+        'case_sensitive': bool,
+        'name': str,
+        'priority': (int, float),
+        'id': (int, float),
+        'comment': str,
+        'selective': bool,
+        'secondary_keys': list,
+        'constant': bool,
+        'position': str  # Should be 'before_char' or 'after_char'
+    }
+    for field, expected_type in optional_fields.items():
+        if field in entry and not isinstance(entry[field], expected_type):
+            validation_messages.append(f"Entry {idx}: Field '{field}' must be of type '{expected_type}'.")
+    # Validate 'position' value if present
+    if 'position' in entry:
+        if entry['position'] not in ['before_char', 'after_char']:
+            validation_messages.append(f"Entry {idx}: Field 'position' must be 'before_char' or 'after_char'.")
+    # Validate 'secondary_keys' if 'selective' is True
+    if entry.get('selective', False):
+        if 'secondary_keys' not in entry or not isinstance(entry['secondary_keys'], list):
+            validation_messages.append(f"Entry {idx}: 'secondary_keys' must be a list when 'selective' is True.")
+        elif not entry['secondary_keys']:
+            validation_messages.append(f"Entry {idx}: 'secondary_keys' cannot be empty when 'selective' is True.")
+    # Validate 'keys' list elements
+    if 'keys' in entry and isinstance(entry['keys'], list):
+        for i, key in enumerate(entry['keys']):
+            if not isinstance(key, str) or not key.strip():
+                validation_messages.append(f"Entry {idx}: Element {i} in 'keys' must be a non-empty string.")
+    # Validate 'secondary_keys' list elements
+    if 'secondary_keys' in entry and isinstance(entry['secondary_keys'], list):
+        for i, key in enumerate(entry['secondary_keys']):
+            if not isinstance(key, str) or not key.strip():
+                validation_messages.append(f"Entry {idx}: Element {i} in 'secondary_keys' must be a non-empty string.")
+    # Validate 'id' uniqueness
+    if 'id' in entry:
+        entry_id = entry['id']
+        if entry_id in entry_ids:
+            validation_messages.append \
+                (f"Entry {idx}: Duplicate 'id' value '{entry_id}'. Each entry 'id' must be unique.")
+        else:
+            entry_ids.add(entry_id)
+    # Validate 'extensions' keys are namespaced
+    if 'extensions' in entry and isinstance(entry['extensions'], dict):
+        for key in entry['extensions'].keys():
+            if '/' not in key and '_' not in key:
+                validation_messages.append \
+                    (f"Entry {idx}: Extension key '{key}' in 'extensions' should be namespaced to prevent conflicts.")
+    is_valid = len(validation_messages) == 0
+    return is_valid, validation_messages
+def validate_v2_card(card_data):
+    """
+    Validate a character card according to the V2 specification.
+    Args:
+        card_data (dict): The parsed character card data.
+    Returns:
+        Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
+    """
+    validation_messages = []
+    # Check top-level fields
+    if 'spec' not in card_data:
+        validation_messages.append("Missing 'spec' field.")
+    elif card_data['spec'] != 'chara_card_v2':
+        validation_messages.append(f"Invalid 'spec' value: {card_data['spec']}. Expected 'chara_card_v2'.")
+    if 'spec_version' not in card_data:
+        validation_messages.append("Missing 'spec_version' field.")
+    else:
+        # Ensure 'spec_version' is '2.0' or higher
+        try:
+            spec_version = float(card_data['spec_version'])
+            if spec_version < 2.0:
+                validation_messages.append \
+                    (f"'spec_version' must be '2.0' or higher. Found '{card_data['spec_version']}'.")
+        except ValueError:
+            validation_messages.append \
+                (f"Invalid 'spec_version' format: {card_data['spec_version']}. Must be a number as a string.")
+    if 'data' not in card_data:
+        validation_messages.append("Missing 'data' field.")
+        return False, validation_messages  # Cannot proceed without 'data' field
+    data = card_data['data']
+    # Required fields in 'data'
+    required_fields = ['name', 'description', 'personality', 'scenario', 'first_mes', 'mes_example']
+    for field in required_fields:
+        if field not in data:
+            validation_messages.append(f"Missing required field in 'data': '{field}'.")
+        elif not isinstance(data[field], str):
+            validation_messages.append(f"Field '{field}' must be a string.")
+        elif not data[field].strip():
+            validation_messages.append(f"Field '{field}' cannot be empty.")
+    # Optional fields with expected types
+    optional_fields = {
+        'creator_notes': str,
+        'system_prompt': str,
+        'post_history_instructions': str,
+        'alternate_greetings': list,
+        'tags': list,
+        'creator': str,
+        'character_version': str,
+        'extensions': dict,
+        'character_book': dict  # If present, should be a dict
+    }
+    for field, expected_type in optional_fields.items():
+        if field in data:
+            if not isinstance(data[field], expected_type):
+                validation_messages.append(f"Field '{field}' must be of type '{expected_type.__name__}'.")
+            elif field == 'extensions':
+                # Validate that extensions keys are properly namespaced
+                for key in data[field].keys():
+                    if '/' not in key and '_' not in key:
+                        validation_messages.append \
+                            (f"Extension key '{key}' in 'extensions' should be namespaced to prevent conflicts.")
+    # If 'alternate_greetings' is present, check that it's a list of non-empty strings
+    if 'alternate_greetings' in data and isinstance(data['alternate_greetings'], list):
+        for idx, greeting in enumerate(data['alternate_greetings']):
+            if not isinstance(greeting, str) or not greeting.strip():
+                validation_messages.append(f"Element {idx} in 'alternate_greetings' must be a non-empty string.")
+    # If 'tags' is present, check that it's a list of non-empty strings
+    if 'tags' in data and isinstance(data['tags'], list):
+        for idx, tag in enumerate(data['tags']):
+            if not isinstance(tag, str) or not tag.strip():
+                validation_messages.append(f"Element {idx} in 'tags' must be a non-empty string.")
+    # Validate 'extensions' field
+    if 'extensions' in data and not isinstance(data['extensions'], dict):
+        validation_messages.append("Field 'extensions' must be a dictionary.")
+    # Validate 'character_book' if present
+    if 'character_book' in data:
+        is_valid_book, book_messages = validate_character_book(data['character_book'])
+        if not is_valid_book:
+            validation_messages.extend(book_messages)
+    is_valid = len(validation_messages) == 0
+    return is_valid, validation_messages
+#
+# End of File
+####################################################################################################

App_Function_Libraries/Character_Chat/__init__.py ADDED Viewed

File without changes

App_Function_Libraries/DB/Character_Chat_DB.py CHANGED Viewed

@@ -1,684 +1,701 @@
-# character_chat_db.py
-# Database functions for managing character cards and chat histories.
-# #
-# Imports
-import configparser
-import sqlite3
-import json
-import os
-import sys
-from typing import List, Dict, Optional, Tuple, Any, Union
-from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
-import logging
-#
-#######################################################################################################################
-#
-#
-def ensure_database_directory():
-    os.makedirs(get_database_dir(), exist_ok=True)
-ensure_database_directory()
-# Construct the path to the config file
-config_path = get_project_relative_path('Config_Files/config.txt')
-# Read the config file
-config = configparser.ConfigParser()
-config.read(config_path)
-# Get the chat db path from the config, or use the default if not specified
-chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
-print(f"Chat Database path: {chat_DB_PATH}")
-########################################################################################################
-#
-# Functions
-# FIXME - Setup properly and test/add documentation for its existence...
-def initialize_database():
-    """Initialize the SQLite database with required tables and FTS5 virtual tables."""
-    conn = None
-    try:
-        conn = sqlite3.connect(chat_DB_PATH)
-        cursor = conn.cursor()
-        # Enable foreign key constraints
-        cursor.execute("PRAGMA foreign_keys = ON;")
-        # Create CharacterCards table with V2 fields
-        cursor.execute("""
-        CREATE TABLE IF NOT EXISTS CharacterCards (
-            id INTEGER PRIMARY KEY AUTOINCREMENT,
-            name TEXT UNIQUE NOT NULL,
-            description TEXT,
-            personality TEXT,
-            scenario TEXT,
-            image BLOB,
-            post_history_instructions TEXT,
-            first_mes TEXT,
-            mes_example TEXT,
-            creator_notes TEXT,
-            system_prompt TEXT,
-            alternate_greetings TEXT,
-            tags TEXT,
-            creator TEXT,
-            character_version TEXT,
-            extensions TEXT,
-            created_at DATETIME DEFAULT CURRENT_TIMESTAMP
-        );
-        """)
-        # Create CharacterChats table
-        cursor.execute("""
-        CREATE TABLE IF NOT EXISTS CharacterChats (
-            id INTEGER PRIMARY KEY AUTOINCREMENT,
-            character_id INTEGER NOT NULL,
-            conversation_name TEXT,
-            chat_history TEXT,
-            is_snapshot BOOLEAN DEFAULT FALSE,
-            created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
-            FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
-        );
-        """)
-        # Create FTS5 virtual table for CharacterChats
-        cursor.execute("""
-        CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
-            conversation_name,
-            chat_history,
-            content='CharacterChats',
-            content_rowid='id'
-        );
-        """)
-        # Create triggers to keep FTS5 table in sync with CharacterChats
-        cursor.executescript("""
-        CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
-            INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
-            VALUES (new.id, new.conversation_name, new.chat_history);
-        END;
-        CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
-            DELETE FROM CharacterChats_fts WHERE rowid = old.id;
-        END;
-        CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
-            UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
-            WHERE rowid = new.id;
-        END;
-        """)
-        # Create ChatKeywords table
-        cursor.execute("""
-        CREATE TABLE IF NOT EXISTS ChatKeywords (
-            chat_id INTEGER NOT NULL,
-            keyword TEXT NOT NULL,
-            FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
-        );
-        """)
-        # Create indexes for faster searches
-        cursor.execute("""
-        CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
-        """)
-        cursor.execute("""
-        CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
-        """)
-        conn.commit()
-        logging.info("Database initialized successfully.")
-    except sqlite3.Error as e:
-        logging.error(f"SQLite error occurred during database initialization: {e}")
-        if conn:
-            conn.rollback()
-        raise
-    except Exception as e:
-        logging.error(f"Unexpected error occurred during database initialization: {e}")
-        if conn:
-            conn.rollback()
-        raise
-    finally:
-        if conn:
-            conn.close()
-# Call initialize_database() at the start of your application
-def setup_chat_database():
-    try:
-        initialize_database()
-    except Exception as e:
-        logging.critical(f"Failed to initialize database: {e}")
-        sys.exit(1)
-setup_chat_database()
-########################################################################################################
-#
-# Character Card handling
-def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
-    """Parse and validate a character card according to V2 specification."""
-    v2_data = {
-        'name': card_data.get('name', ''),
-        'description': card_data.get('description', ''),
-        'personality': card_data.get('personality', ''),
-        'scenario': card_data.get('scenario', ''),
-        'first_mes': card_data.get('first_mes', ''),
-        'mes_example': card_data.get('mes_example', ''),
-        'creator_notes': card_data.get('creator_notes', ''),
-        'system_prompt': card_data.get('system_prompt', ''),
-        'post_history_instructions': card_data.get('post_history_instructions', ''),
-        'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
-        'tags': json.dumps(card_data.get('tags', [])),
-        'creator': card_data.get('creator', ''),
-        'character_version': card_data.get('character_version', ''),
-        'extensions': json.dumps(card_data.get('extensions', {}))
-    }
-    # Handle 'image' separately as it might be binary data
-    if 'image' in card_data:
-        v2_data['image'] = card_data['image']
-    return v2_data
-def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
-    """Add or update a character card in the database."""
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        parsed_card = parse_character_card(card_data)
-        # Check if character already exists
-        cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
-        row = cursor.fetchone()
-        if row:
-            # Update existing character
-            character_id = row[0]
-            update_query = """
-                UPDATE CharacterCards
-                SET description = ?, personality = ?, scenario = ?, image = ?,
-                    post_history_instructions = ?, first_mes = ?, mes_example = ?,
-                    creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
-                    tags = ?, creator = ?, character_version = ?, extensions = ?
-                WHERE id = ?
-            """
-            cursor.execute(update_query, (
-                parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
-                parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
-                parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
-                parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
-                parsed_card['character_version'], parsed_card['extensions'], character_id
-            ))
-        else:
-            # Insert new character
-            insert_query = """
-                INSERT INTO CharacterCards (name, description, personality, scenario, image,
-                post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
-                alternate_greetings, tags, creator, character_version, extensions)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-            """
-            cursor.execute(insert_query, (
-                parsed_card['name'], parsed_card['description'], parsed_card['personality'],
-                parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
-                parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
-                parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
-                parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
-            ))
-            character_id = cursor.lastrowid
-        conn.commit()
-        return character_id
-    except sqlite3.IntegrityError as e:
-        logging.error(f"Error adding character card: {e}")
-        return None
-    except Exception as e:
-        logging.error(f"Unexpected error adding character card: {e}")
-        return None
-    finally:
-        conn.close()
-# def add_character_card(card_data: Dict) -> Optional[int]:
-#     """Add or update a character card in the database.
-#
-#     Returns the ID of the inserted character or None if failed.
-#     """
-#     conn = sqlite3.connect(chat_DB_PATH)
-#     cursor = conn.cursor()
-#     try:
-#         # Ensure all required fields are present
-#         required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
-#         for field in required_fields:
-#             if field not in card_data:
-#                 card_data[field] = ''  # Assign empty string if field is missing
-#
-#         # Check if character already exists
-#         cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
-#         row = cursor.fetchone()
-#
-#         if row:
-#             # Update existing character
-#             character_id = row[0]
-#             cursor.execute("""
-#                 UPDATE CharacterCards
-#                 SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
-#                 WHERE id = ?
-#             """, (
-#                 card_data['description'],
-#                 card_data['personality'],
-#                 card_data['scenario'],
-#                 card_data['image'],
-#                 card_data['post_history_instructions'],
-#                 card_data['first_message'],
-#                 character_id
-#             ))
-#         else:
-#             # Insert new character
-#             cursor.execute("""
-#                 INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
-#                 VALUES (?, ?, ?, ?, ?, ?, ?)
-#             """, (
-#                 card_data['name'],
-#                 card_data['description'],
-#                 card_data['personality'],
-#                 card_data['scenario'],
-#                 card_data['image'],
-#                 card_data['post_history_instructions'],
-#                 card_data['first_message']
-#             ))
-#             character_id = cursor.lastrowid
-#
-#         conn.commit()
-#         return cursor.lastrowid
-#     except sqlite3.IntegrityError as e:
-#         logging.error(f"Error adding character card: {e}")
-#         return None
-#     except Exception as e:
-#         logging.error(f"Unexpected error adding character card: {e}")
-#         return None
-#     finally:
-#         conn.close()
-def get_character_cards() -> List[Dict]:
-    """Retrieve all character cards from the database."""
-    logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    cursor.execute("SELECT * FROM CharacterCards")
-    rows = cursor.fetchall()
-    columns = [description[0] for description in cursor.description]
-    conn.close()
-    characters = [dict(zip(columns, row)) for row in rows]
-    #logging.debug(f"Characters fetched from DB: {characters}")
-    return characters
-def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
-    """
-    Retrieve a single character card by its ID.
-    Args:
-        character_id: Can be either an integer ID or a dictionary containing character data.
-    Returns:
-        A dictionary containing the character card data, or None if not found.
-    """
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        if isinstance(character_id, dict):
-            # If a dictionary is passed, assume it's already a character card
-            return character_id
-        elif isinstance(character_id, int):
-            # If an integer is passed, fetch the character from the database
-            cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
-            row = cursor.fetchone()
-            if row:
-                columns = [description[0] for description in cursor.description]
-                return dict(zip(columns, row))
-        else:
-            logging.warning(f"Invalid type for character_id: {type(character_id)}")
-        return None
-    except Exception as e:
-        logging.error(f"Error in get_character_card_by_id: {e}")
-        return None
-    finally:
-        conn.close()
-def update_character_card(character_id: int, card_data: Dict) -> bool:
-    """Update an existing character card."""
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        cursor.execute("""
-            UPDATE CharacterCards
-            SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
-            WHERE id = ?
-        """, (
-            card_data.get('name'),
-            card_data.get('description'),
-            card_data.get('personality'),
-            card_data.get('scenario'),
-            card_data.get('image'),
-            card_data.get('post_history_instructions', ''),
-            card_data.get('first_message', "Hello! I'm ready to chat."),
-            character_id
-        ))
-        conn.commit()
-        return cursor.rowcount > 0
-    except sqlite3.IntegrityError as e:
-        logging.error(f"Error updating character card: {e}")
-        return False
-    finally:
-        conn.close()
-def delete_character_card(character_id: int) -> bool:
-    """Delete a character card and its associated chats."""
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        # Delete associated chats first due to foreign key constraint
-        cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
-        cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
-        conn.commit()
-        return cursor.rowcount > 0
-    except sqlite3.Error as e:
-        logging.error(f"Error deleting character card: {e}")
-        return False
-    finally:
-        conn.close()
-def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
-    """
-    Add a new chat history for a character, optionally associating keywords.
-    Args:
-        character_id (int): The ID of the character.
-        conversation_name (str): Name of the conversation.
-        chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
-        keywords (Optional[List[str]]): List of keywords to associate with this chat.
-        is_snapshot (bool, optional): Whether this chat is a snapshot.
-    Returns:
-        Optional[int]: The ID of the inserted chat or None if failed.
-    """
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        chat_history_json = json.dumps(chat_history)
-        cursor.execute("""
-            INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
-            VALUES (?, ?, ?, ?)
-        """, (
-            character_id,
-            conversation_name,
-            chat_history_json,
-            is_snapshot
-        ))
-        chat_id = cursor.lastrowid
-        if keywords:
-            # Insert keywords into ChatKeywords table
-            keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
-            cursor.executemany("""
-                INSERT INTO ChatKeywords (chat_id, keyword)
-                VALUES (?, ?)
-            """, keyword_records)
-        conn.commit()
-        return chat_id
-    except sqlite3.Error as e:
-        logging.error(f"Error adding character chat: {e}")
-        return None
-    finally:
-        conn.close()
-def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
-    """Retrieve all chats, or chats for a specific character if character_id is provided."""
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    if character_id is not None:
-        cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
-    else:
-        cursor.execute("SELECT * FROM CharacterChats")
-    rows = cursor.fetchall()
-    columns = [description[0] for description in cursor.description]
-    conn.close()
-    return [dict(zip(columns, row)) for row in rows]
-def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
-    """Retrieve a single chat by its ID."""
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
-    row = cursor.fetchone()
-    conn.close()
-    if row:
-        columns = [description[0] for description in cursor.description]
-        chat = dict(zip(columns, row))
-        chat['chat_history'] = json.loads(chat['chat_history'])
-        return chat
-    return None
-def search_character_chats(query: str) -> Tuple[List[Dict], str]:
-    """
-    Search for character chats using FTS5.
-    Args:
-        query (str): The search query.
-    Returns:
-        Tuple[List[Dict], str]: A list of matching chats and a status message.
-    """
-    if not query.strip():
-        return [], "Please enter a search query."
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        # Use parameterized queries to prevent SQL injection
-        cursor.execute("""
-            SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
-            FROM CharacterChats_fts
-            JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
-            WHERE CharacterChats_fts MATCH ?
-            ORDER BY rank
-        """, (query,))
-        rows = cursor.fetchall()
-        columns = [description[0] for description in cursor.description]
-        results = [dict(zip(columns, row)) for row in rows]
-        status_message = f"Found {len(results)} chat(s) matching '{query}'."
-        return results, status_message
-    except Exception as e:
-        logging.error(f"Error searching chats with FTS5: {e}")
-        return [], f"Error occurred during search: {e}"
-    finally:
-        conn.close()
-def update_character_chat(chat_id: int, chat_history: List[Tuple[str, str]]) -> bool:
-    """Update an existing chat history."""
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        chat_history_json = json.dumps(chat_history)
-        cursor.execute("""
-            UPDATE CharacterChats
-            SET chat_history = ?
-            WHERE id = ?
-        """, (
-            chat_history_json,
-            chat_id
-        ))
-        conn.commit()
-        return cursor.rowcount > 0
-    except sqlite3.Error as e:
-        logging.error(f"Error updating character chat: {e}")
-        return False
-    finally:
-        conn.close()
-def delete_character_chat(chat_id: int) -> bool:
-    """Delete a specific chat."""
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        cursor.execute("DELETE FROM CharacterChats WHERE id = ?", (chat_id,))
-        conn.commit()
-        return cursor.rowcount > 0
-    except sqlite3.Error as e:
-        logging.error(f"Error deleting character chat: {e}")
-        return False
-    finally:
-        conn.close()
-def fetch_keywords_for_chats(keywords: List[str]) -> List[int]:
-    """
-    Fetch chat IDs associated with any of the specified keywords.
-    Args:
-        keywords (List[str]): List of keywords to search for.
-    Returns:
-        List[int]: List of chat IDs associated with the keywords.
-    """
-    if not keywords:
-        return []
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        # Construct the WHERE clause to search for each keyword
-        keyword_clauses = " OR ".join(["keyword = ?"] * len(keywords))
-        sql_query = f"SELECT DISTINCT chat_id FROM ChatKeywords WHERE {keyword_clauses}"
-        cursor.execute(sql_query, keywords)
-        rows = cursor.fetchall()
-        chat_ids = [row[0] for row in rows]
-        return chat_ids
-    except Exception as e:
-        logging.error(f"Error in fetch_keywords_for_chats: {e}")
-        return []
-    finally:
-        conn.close()
-def save_chat_history_to_character_db(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]]) -> Optional[int]:
-    """Save chat history to the CharacterChats table.
-    Returns the ID of the inserted chat or None if failed.
-    """
-    return add_character_chat(character_id, conversation_name, chat_history)
-def migrate_chat_to_media_db():
-    pass
-def search_db(query: str, fields: List[str], where_clause: str = "", page: int = 1, results_per_page: int = 5) -> List[Dict[str, Any]]:
-    """
-    Perform a full-text search on specified fields with optional filtering and pagination.
-    Args:
-        query (str): The search query.
-        fields (List[str]): List of fields to search in.
-        where_clause (str, optional): Additional SQL WHERE clause to filter results.
-        page (int, optional): Page number for pagination.
-        results_per_page (int, optional): Number of results per page.
-    Returns:
-        List[Dict[str, Any]]: List of matching chat records with content and metadata.
-    """
-    if not query.strip():
-        return []
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    try:
-        # Construct the MATCH query for FTS5
-        match_query = " AND ".join(fields) + f" MATCH ?"
-        # Adjust the query with the fields
-        fts_query = f"""
-            SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
-            FROM CharacterChats_fts
-            JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
-            WHERE {match_query}
-        """
-        if where_clause:
-            fts_query += f" AND ({where_clause})"
-        fts_query += " ORDER BY rank LIMIT ? OFFSET ?"
-        offset = (page - 1) * results_per_page
-        cursor.execute(fts_query, (query, results_per_page, offset))
-        rows = cursor.fetchall()
-        columns = [description[0] for description in cursor.description]
-        results = [dict(zip(columns, row)) for row in rows]
-        return results
-    except Exception as e:
-        logging.error(f"Error in search_db: {e}")
-        return []
-    finally:
-        conn.close()
-def perform_full_text_search_chat(query: str, relevant_chat_ids: List[int], page: int = 1, results_per_page: int = 5) -> \
-List[Dict[str, Any]]:
-    """
-    Perform a full-text search within the specified chat IDs using FTS5.
-    Args:
-        query (str): The user's query.
-        relevant_chat_ids (List[int]): List of chat IDs to search within.
-        page (int): Pagination page number.
-        results_per_page (int): Number of results per page.
-    Returns:
-        List[Dict[str, Any]]: List of search results with content and metadata.
-    """
-    try:
-        # Construct a WHERE clause to limit the search to relevant chat IDs
-        where_clause = " OR ".join([f"media_id = {chat_id}" for chat_id in relevant_chat_ids])
-        if not where_clause:
-            where_clause = "1"  # No restriction if no chat IDs
-        # Perform full-text search using FTS5
-        fts_results = search_db(query, ["content"], where_clause, page=page, results_per_page=results_per_page)
-        filtered_fts_results = [
-            {
-                "content": result['content'],
-                "metadata": {"media_id": result['id']}
-            }
-            for result in fts_results
-            if result['id'] in relevant_chat_ids
-        ]
-        return filtered_fts_results
-    except Exception as e:
-        logging.error(f"Error in perform_full_text_search_chat: {str(e)}")
-        return []
-def fetch_all_chats() -> List[Dict[str, Any]]:
-    """
-    Fetch all chat messages from the database.
-    Returns:
-        List[Dict[str, Any]]: List of chat messages with relevant metadata.
-    """
-    try:
-        chats = get_character_chats()  # Modify this function to retrieve all chats
-        return chats
-    except Exception as e:
-        logging.error(f"Error fetching all chats: {str(e)}")
-        return []
-#
-# End of Character_Chat_DB.py
-#######################################################################################################################

+# character_chat_db.py
+# Database functions for managing character cards and chat histories.
+# #
+# Imports
+import configparser
+import sqlite3
+import json
+import os
+import sys
+from typing import List, Dict, Optional, Tuple, Any, Union
+from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
+from Tests.Chat_APIs.Chat_APIs_Integration_test import logging
+#
+#######################################################################################################################
+#
+#
+def ensure_database_directory():
+    os.makedirs(get_database_dir(), exist_ok=True)
+ensure_database_directory()
+# Construct the path to the config file
+config_path = get_project_relative_path('Config_Files/config.txt')
+# Read the config file
+config = configparser.ConfigParser()
+config.read(config_path)
+# Get the chat db path from the config, or use the default if not specified
+chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
+print(f"Chat Database path: {chat_DB_PATH}")
+########################################################################################################
+#
+# Functions
+# FIXME - Setup properly and test/add documentation for its existence...
+def initialize_database():
+    """Initialize the SQLite database with required tables and FTS5 virtual tables."""
+    conn = None
+    try:
+        conn = sqlite3.connect(chat_DB_PATH)
+        cursor = conn.cursor()
+        # Enable foreign key constraints
+        cursor.execute("PRAGMA foreign_keys = ON;")
+        # Create CharacterCards table with V2 fields
+        cursor.execute("""
+        CREATE TABLE IF NOT EXISTS CharacterCards (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            name TEXT UNIQUE NOT NULL,
+            description TEXT,
+            personality TEXT,
+            scenario TEXT,
+            image BLOB,
+            post_history_instructions TEXT,
+            first_mes TEXT,
+            mes_example TEXT,
+            creator_notes TEXT,
+            system_prompt TEXT,
+            alternate_greetings TEXT,
+            tags TEXT,
+            creator TEXT,
+            character_version TEXT,
+            extensions TEXT,
+            created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+        );
+        """)
+        # Create CharacterChats table
+        cursor.execute("""
+        CREATE TABLE IF NOT EXISTS CharacterChats (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            character_id INTEGER NOT NULL,
+            conversation_name TEXT,
+            chat_history TEXT,
+            is_snapshot BOOLEAN DEFAULT FALSE,
+            created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+            FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
+        );
+        """)
+        # Create FTS5 virtual table for CharacterChats
+        cursor.execute("""
+        CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
+            conversation_name,
+            chat_history,
+            content='CharacterChats',
+            content_rowid='id'
+        );
+        """)
+        # Create triggers to keep FTS5 table in sync with CharacterChats
+        cursor.executescript("""
+        CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
+            INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
+            VALUES (new.id, new.conversation_name, new.chat_history);
+        END;
+        CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
+            DELETE FROM CharacterChats_fts WHERE rowid = old.id;
+        END;
+        CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
+            UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
+            WHERE rowid = new.id;
+        END;
+        """)
+        # Create ChatKeywords table
+        cursor.execute("""
+        CREATE TABLE IF NOT EXISTS ChatKeywords (
+            chat_id INTEGER NOT NULL,
+            keyword TEXT NOT NULL,
+            FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
+        );
+        """)
+        # Create indexes for faster searches
+        cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
+        """)
+        cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
+        """)
+        conn.commit()
+        logging.info("Database initialized successfully.")
+    except sqlite3.Error as e:
+        logging.error(f"SQLite error occurred during database initialization: {e}")
+        if conn:
+            conn.rollback()
+        raise
+    except Exception as e:
+        logging.error(f"Unexpected error occurred during database initialization: {e}")
+        if conn:
+            conn.rollback()
+        raise
+    finally:
+        if conn:
+            conn.close()
+# Call initialize_database() at the start of your application
+def setup_chat_database():
+    try:
+        initialize_database()
+    except Exception as e:
+        logging.critical(f"Failed to initialize database: {e}")
+        sys.exit(1)
+setup_chat_database()
+########################################################################################################
+#
+# Character Card handling
+def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
+    """Parse and validate a character card according to V2 specification."""
+    v2_data = {
+        'name': card_data.get('name', ''),
+        'description': card_data.get('description', ''),
+        'personality': card_data.get('personality', ''),
+        'scenario': card_data.get('scenario', ''),
+        'first_mes': card_data.get('first_mes', ''),
+        'mes_example': card_data.get('mes_example', ''),
+        'creator_notes': card_data.get('creator_notes', ''),
+        'system_prompt': card_data.get('system_prompt', ''),
+        'post_history_instructions': card_data.get('post_history_instructions', ''),
+        'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
+        'tags': json.dumps(card_data.get('tags', [])),
+        'creator': card_data.get('creator', ''),
+        'character_version': card_data.get('character_version', ''),
+        'extensions': json.dumps(card_data.get('extensions', {}))
+    }
+    # Handle 'image' separately as it might be binary data
+    if 'image' in card_data:
+        v2_data['image'] = card_data['image']
+    return v2_data
+def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
+    """Add or update a character card in the database."""
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        parsed_card = parse_character_card(card_data)
+        # Check if character already exists
+        cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
+        row = cursor.fetchone()
+        if row:
+            # Update existing character
+            character_id = row[0]
+            update_query = """
+                UPDATE CharacterCards
+                SET description = ?, personality = ?, scenario = ?, image = ?,
+                    post_history_instructions = ?, first_mes = ?, mes_example = ?,
+                    creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
+                    tags = ?, creator = ?, character_version = ?, extensions = ?
+                WHERE id = ?
+            """
+            cursor.execute(update_query, (
+                parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
+                parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
+                parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
+                parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
+                parsed_card['character_version'], parsed_card['extensions'], character_id
+            ))
+        else:
+            # Insert new character
+            insert_query = """
+                INSERT INTO CharacterCards (name, description, personality, scenario, image,
+                post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
+                alternate_greetings, tags, creator, character_version, extensions)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """
+            cursor.execute(insert_query, (
+                parsed_card['name'], parsed_card['description'], parsed_card['personality'],
+                parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
+                parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
+                parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
+                parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
+            ))
+            character_id = cursor.lastrowid
+        conn.commit()
+        return character_id
+    except sqlite3.IntegrityError as e:
+        logging.error(f"Error adding character card: {e}")
+        return None
+    except Exception as e:
+        logging.error(f"Unexpected error adding character card: {e}")
+        return None
+    finally:
+        conn.close()
+# def add_character_card(card_data: Dict) -> Optional[int]:
+#     """Add or update a character card in the database.
+#
+#     Returns the ID of the inserted character or None if failed.
+#     """
+#     conn = sqlite3.connect(chat_DB_PATH)
+#     cursor = conn.cursor()
+#     try:
+#         # Ensure all required fields are present
+#         required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
+#         for field in required_fields:
+#             if field not in card_data:
+#                 card_data[field] = ''  # Assign empty string if field is missing
+#
+#         # Check if character already exists
+#         cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
+#         row = cursor.fetchone()
+#
+#         if row:
+#             # Update existing character
+#             character_id = row[0]
+#             cursor.execute("""
+#                 UPDATE CharacterCards
+#                 SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
+#                 WHERE id = ?
+#             """, (
+#                 card_data['description'],
+#                 card_data['personality'],
+#                 card_data['scenario'],
+#                 card_data['image'],
+#                 card_data['post_history_instructions'],
+#                 card_data['first_message'],
+#                 character_id
+#             ))
+#         else:
+#             # Insert new character
+#             cursor.execute("""
+#                 INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
+#                 VALUES (?, ?, ?, ?, ?, ?, ?)
+#             """, (
+#                 card_data['name'],
+#                 card_data['description'],
+#                 card_data['personality'],
+#                 card_data['scenario'],
+#                 card_data['image'],
+#                 card_data['post_history_instructions'],
+#                 card_data['first_message']
+#             ))
+#             character_id = cursor.lastrowid
+#
+#         conn.commit()
+#         return cursor.lastrowid
+#     except sqlite3.IntegrityError as e:
+#         logging.error(f"Error adding character card: {e}")
+#         return None
+#     except Exception as e:
+#         logging.error(f"Unexpected error adding character card: {e}")
+#         return None
+#     finally:
+#         conn.close()
+def get_character_cards() -> List[Dict]:
+    """Retrieve all character cards from the database."""
+    logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM CharacterCards")
+    rows = cursor.fetchall()
+    columns = [description[0] for description in cursor.description]
+    conn.close()
+    characters = [dict(zip(columns, row)) for row in rows]
+    #logging.debug(f"Characters fetched from DB: {characters}")
+    return characters
+def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+    """
+    Retrieve a single character card by its ID.
+    Args:
+        character_id: Can be either an integer ID or a dictionary containing character data.
+    Returns:
+        A dictionary containing the character card data, or None if not found.
+    """
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        if isinstance(character_id, dict):
+            # If a dictionary is passed, assume it's already a character card
+            return character_id
+        elif isinstance(character_id, int):
+            # If an integer is passed, fetch the character from the database
+            cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
+            row = cursor.fetchone()
+            if row:
+                columns = [description[0] for description in cursor.description]
+                return dict(zip(columns, row))
+        else:
+            logging.warning(f"Invalid type for character_id: {type(character_id)}")
+        return None
+    except Exception as e:
+        logging.error(f"Error in get_character_card_by_id: {e}")
+        return None
+    finally:
+        conn.close()
+def update_character_card(character_id: int, card_data: Dict) -> bool:
+    """Update an existing character card."""
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        cursor.execute("""
+            UPDATE CharacterCards
+            SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
+            WHERE id = ?
+        """, (
+            card_data.get('name'),
+            card_data.get('description'),
+            card_data.get('personality'),
+            card_data.get('scenario'),
+            card_data.get('image'),
+            card_data.get('post_history_instructions', ''),
+            card_data.get('first_message', "Hello! I'm ready to chat."),
+            character_id
+        ))
+        conn.commit()
+        return cursor.rowcount > 0
+    except sqlite3.IntegrityError as e:
+        logging.error(f"Error updating character card: {e}")
+        return False
+    finally:
+        conn.close()
+def delete_character_card(character_id: int) -> bool:
+    """Delete a character card and its associated chats."""
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        # Delete associated chats first due to foreign key constraint
+        cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
+        cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
+        conn.commit()
+        return cursor.rowcount > 0
+    except sqlite3.Error as e:
+        logging.error(f"Error deleting character card: {e}")
+        return False
+    finally:
+        conn.close()
+def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
+    """
+    Add a new chat history for a character, optionally associating keywords.
+    Args:
+        character_id (int): The ID of the character.
+        conversation_name (str): Name of the conversation.
+        chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
+        keywords (Optional[List[str]]): List of keywords to associate with this chat.
+        is_snapshot (bool, optional): Whether this chat is a snapshot.
+    Returns:
+        Optional[int]: The ID of the inserted chat or None if failed.
+    """
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        chat_history_json = json.dumps(chat_history)
+        cursor.execute("""
+            INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
+            VALUES (?, ?, ?, ?)
+        """, (
+            character_id,
+            conversation_name,
+            chat_history_json,
+            is_snapshot
+        ))
+        chat_id = cursor.lastrowid
+        if keywords:
+            # Insert keywords into ChatKeywords table
+            keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
+            cursor.executemany("""
+                INSERT INTO ChatKeywords (chat_id, keyword)
+                VALUES (?, ?)
+            """, keyword_records)
+        conn.commit()
+        return chat_id
+    except sqlite3.Error as e:
+        logging.error(f"Error adding character chat: {e}")
+        return None
+    finally:
+        conn.close()
+def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
+    """Retrieve all chats, or chats for a specific character if character_id is provided."""
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    if character_id is not None:
+        cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
+    else:
+        cursor.execute("SELECT * FROM CharacterChats")
+    rows = cursor.fetchall()
+    columns = [description[0] for description in cursor.description]
+    conn.close()
+    return [dict(zip(columns, row)) for row in rows]
+def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
+    """Retrieve a single chat by its ID."""
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
+    row = cursor.fetchone()
+    conn.close()
+    if row:
+        columns = [description[0] for description in cursor.description]
+        chat = dict(zip(columns, row))
+        chat['chat_history'] = json.loads(chat['chat_history'])
+        return chat
+    return None
+def search_character_chats(query: str, character_id: Optional[int] = None) -> Tuple[List[Dict], str]:
+    """
+    Search for character chats using FTS5, optionally filtered by character_id.
+    Args:
+        query (str): The search query.
+        character_id (Optional[int]): The ID of the character to filter chats by.
+    Returns:
+        Tuple[List[Dict], str]: A list of matching chats and a status message.
+    """
+    if not query.strip():
+        return [], "Please enter a search query."
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        if character_id is not None:
+            # Search with character_id filter
+            cursor.execute("""
+                SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
+                FROM CharacterChats_fts
+                JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
+                WHERE CharacterChats_fts MATCH ? AND CharacterChats.character_id = ?
+                ORDER BY rank
+            """, (query, character_id))
+        else:
+            # Search without character_id filter
+            cursor.execute("""
+                SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
+                FROM CharacterChats_fts
+                JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
+                WHERE CharacterChats_fts MATCH ?
+                ORDER BY rank
+            """, (query,))
+        rows = cursor.fetchall()
+        columns = [description[0] for description in cursor.description]
+        results = [dict(zip(columns, row)) for row in rows]
+        if character_id is not None:
+            status_message = f"Found {len(results)} chat(s) matching '{query}' for the selected character."
+        else:
+            status_message = f"Found {len(results)} chat(s) matching '{query}' across all characters."
+        return results, status_message
+    except Exception as e:
+        logging.error(f"Error searching chats with FTS5: {e}")
+        return [], f"Error occurred during search: {e}"
+    finally:
+        conn.close()
+def update_character_chat(chat_id: int, chat_history: List[Tuple[str, str]]) -> bool:
+    """Update an existing chat history."""
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        chat_history_json = json.dumps(chat_history)
+        cursor.execute("""
+            UPDATE CharacterChats
+            SET chat_history = ?
+            WHERE id = ?
+        """, (
+            chat_history_json,
+            chat_id
+        ))
+        conn.commit()
+        return cursor.rowcount > 0
+    except sqlite3.Error as e:
+        logging.error(f"Error updating character chat: {e}")
+        return False
+    finally:
+        conn.close()
+def delete_character_chat(chat_id: int) -> bool:
+    """Delete a specific chat."""
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        cursor.execute("DELETE FROM CharacterChats WHERE id = ?", (chat_id,))
+        conn.commit()
+        return cursor.rowcount > 0
+    except sqlite3.Error as e:
+        logging.error(f"Error deleting character chat: {e}")
+        return False
+    finally:
+        conn.close()
+def fetch_keywords_for_chats(keywords: List[str]) -> List[int]:
+    """
+    Fetch chat IDs associated with any of the specified keywords.
+    Args:
+        keywords (List[str]): List of keywords to search for.
+    Returns:
+        List[int]: List of chat IDs associated with the keywords.
+    """
+    if not keywords:
+        return []
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        # Construct the WHERE clause to search for each keyword
+        keyword_clauses = " OR ".join(["keyword = ?"] * len(keywords))
+        sql_query = f"SELECT DISTINCT chat_id FROM ChatKeywords WHERE {keyword_clauses}"
+        cursor.execute(sql_query, keywords)
+        rows = cursor.fetchall()
+        chat_ids = [row[0] for row in rows]
+        return chat_ids
+    except Exception as e:
+        logging.error(f"Error in fetch_keywords_for_chats: {e}")
+        return []
+    finally:
+        conn.close()
+def save_chat_history_to_character_db(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]]) -> Optional[int]:
+    """Save chat history to the CharacterChats table.
+    Returns the ID of the inserted chat or None if failed.
+    """
+    return add_character_chat(character_id, conversation_name, chat_history)
+def migrate_chat_to_media_db():
+    pass
+def search_db(query: str, fields: List[str], where_clause: str = "", page: int = 1, results_per_page: int = 5) -> List[Dict[str, Any]]:
+    """
+    Perform a full-text search on specified fields with optional filtering and pagination.
+    Args:
+        query (str): The search query.
+        fields (List[str]): List of fields to search in.
+        where_clause (str, optional): Additional SQL WHERE clause to filter results.
+        page (int, optional): Page number for pagination.
+        results_per_page (int, optional): Number of results per page.
+    Returns:
+        List[Dict[str, Any]]: List of matching chat records with content and metadata.
+    """
+    if not query.strip():
+        return []
+    conn = sqlite3.connect(chat_DB_PATH)
+    cursor = conn.cursor()
+    try:
+        # Construct the MATCH query for FTS5
+        match_query = " AND ".join(fields) + f" MATCH ?"
+        # Adjust the query with the fields
+        fts_query = f"""
+            SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
+            FROM CharacterChats_fts
+            JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
+            WHERE {match_query}
+        """
+        if where_clause:
+            fts_query += f" AND ({where_clause})"
+        fts_query += " ORDER BY rank LIMIT ? OFFSET ?"
+        offset = (page - 1) * results_per_page
+        cursor.execute(fts_query, (query, results_per_page, offset))
+        rows = cursor.fetchall()
+        columns = [description[0] for description in cursor.description]
+        results = [dict(zip(columns, row)) for row in rows]
+        return results
+    except Exception as e:
+        logging.error(f"Error in search_db: {e}")
+        return []
+    finally:
+        conn.close()
+def perform_full_text_search_chat(query: str, relevant_chat_ids: List[int], page: int = 1, results_per_page: int = 5) -> \
+List[Dict[str, Any]]:
+    """
+    Perform a full-text search within the specified chat IDs using FTS5.
+    Args:
+        query (str): The user's query.
+        relevant_chat_ids (List[int]): List of chat IDs to search within.
+        page (int): Pagination page number.
+        results_per_page (int): Number of results per page.
+    Returns:
+        List[Dict[str, Any]]: List of search results with content and metadata.
+    """
+    try:
+        # Construct a WHERE clause to limit the search to relevant chat IDs
+        where_clause = " OR ".join([f"media_id = {chat_id}" for chat_id in relevant_chat_ids])
+        if not where_clause:
+            where_clause = "1"  # No restriction if no chat IDs
+        # Perform full-text search using FTS5
+        fts_results = search_db(query, ["content"], where_clause, page=page, results_per_page=results_per_page)
+        filtered_fts_results = [
+            {
+                "content": result['content'],
+                "metadata": {"media_id": result['id']}
+            }
+            for result in fts_results
+            if result['id'] in relevant_chat_ids
+        ]
+        return filtered_fts_results
+    except Exception as e:
+        logging.error(f"Error in perform_full_text_search_chat: {str(e)}")
+        return []
+def fetch_all_chats() -> List[Dict[str, Any]]:
+    """
+    Fetch all chat messages from the database.
+    Returns:
+        List[Dict[str, Any]]: List of chat messages with relevant metadata.
+    """
+    try:
+        chats = get_character_chats()  # Modify this function to retrieve all chats
+        return chats
+    except Exception as e:
+        logging.error(f"Error fetching all chats: {str(e)}")
+        return []
+#
+# End of Character_Chat_DB.py
+#######################################################################################################################

App_Function_Libraries/DB/DB_Manager.py CHANGED Viewed

@@ -309,7 +309,10 @@ def add_media_to_database(*args, **kwargs):
         result = sqlite_add_media_to_database(*args, **kwargs)
         # Extract content
-        segments = args[2]
         if isinstance(segments, list):
             content = ' '.join([segment.get('Text', '') for segment in segments if 'Text' in segment])
         elif isinstance(segments, dict):

         result = sqlite_add_media_to_database(*args, **kwargs)
         # Extract content
+        segments = kwargs.get('segments') if 'segments' in kwargs else args[2] if len(args) > 2 else None
+        if segments is None:
+            raise ValueError("Segments not provided in arguments")
         if isinstance(segments, list):
             content = ' '.join([segment.get('Text', '') for segment in segments if 'Text' in segment])
         elif isinstance(segments, dict):

App_Function_Libraries/DB/SQLite_DB.py CHANGED Viewed

@@ -1181,8 +1181,6 @@ def is_valid_date(date_string: str) -> bool:
         return False
 def add_media_to_database(url, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model, media_type='video', overwrite=False, db=None):
     if db is None:
         db = Database()
@@ -1196,6 +1194,7 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
                 url_hash = hashlib.md5(f"{title}{media_type}".encode()).hexdigest()
                 url = f"https://No-URL-Submitted.com/{media_type}/{quote(title)}-{url_hash}"
             # Extract content from segments
             if isinstance(segments, list):
@@ -1217,15 +1216,24 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
             cursor.execute('SELECT id FROM Media WHERE url = ?', (url,))
             existing_media = cursor.fetchone()
             if existing_media:
                 if overwrite:
-                    media_id = existing_media[0]
                     cursor.execute('''
                     UPDATE Media
                     SET content = ?, transcription_model = ?, title = ?, type = ?, author = ?, ingestion_date = ?, chunking_status = ?
                     WHERE id = ?
                     ''', (content, whisper_model, info_dict.get('title', 'Untitled'), media_type,
                           info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), 'pending', media_id))
             else:
                 cursor.execute('''
                 INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model, chunking_status)
@@ -1233,12 +1241,17 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
                 ''', (url, info_dict.get('title', 'Untitled'), media_type, content,
                       info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), whisper_model, 'pending'))
                 media_id = cursor.lastrowid
-            # Add modification
-            cursor.execute('''
-            INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
-            VALUES (?, ?, ?, ?)
-            ''', (media_id, custom_prompt_input, summary, datetime.now().strftime('%Y-%m-%d')))
             # Process keywords
             for keyword in keyword_list:
@@ -1266,7 +1279,8 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
         schedule_chunking(media_id, content, info_dict.get('title', 'Untitled'))
         action = "updated" if existing_media and overwrite else "added"
-        return f"Media '{info_dict.get('title', 'Untitled')}' {action} successfully with URL: {url} and keywords: {', '.join(keyword_list)}. Chunking scheduled."
     except DatabaseError as e:
         logging.error(f"Database error: {e}")

         return False
 def add_media_to_database(url, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model, media_type='video', overwrite=False, db=None):
     if db is None:
         db = Database()
                 url_hash = hashlib.md5(f"{title}{media_type}".encode()).hexdigest()
                 url = f"https://No-URL-Submitted.com/{media_type}/{quote(title)}-{url_hash}"
+            logging.debug(f"Checking for existing media with URL: {url}")
             # Extract content from segments
             if isinstance(segments, list):
             cursor.execute('SELECT id FROM Media WHERE url = ?', (url,))
             existing_media = cursor.fetchone()
+            logging.debug(f"Existing media: {existing_media}")
+            logging.debug(f"Overwrite flag: {overwrite}")
             if existing_media:
+                media_id = existing_media[0]
+                logging.debug(f"Existing media_id: {media_id}")
                 if overwrite:
+                    logging.debug("Updating existing media")
                     cursor.execute('''
                     UPDATE Media
                     SET content = ?, transcription_model = ?, title = ?, type = ?, author = ?, ingestion_date = ?, chunking_status = ?
                     WHERE id = ?
                     ''', (content, whisper_model, info_dict.get('title', 'Untitled'), media_type,
                           info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), 'pending', media_id))
+                    action = "updated"
+                else:
+                    logging.debug("Media exists but not updating (overwrite=False)")
+                    action = "already exists (not updated)"
             else:
                 cursor.execute('''
                 INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model, chunking_status)
                 ''', (url, info_dict.get('title', 'Untitled'), media_type, content,
                       info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), whisper_model, 'pending'))
                 media_id = cursor.lastrowid
+                action = "added"
+                logging.debug(f"New media_id: {media_id}")
+            logging.debug(f"Before MediaModifications insert, media_id: {media_id}")
+            # Only proceed with modifications if the media was added or updated
+            if action in ["updated", "added"]:
+                cursor.execute('''
+                INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
+                VALUES (?, ?, ?, ?)
+                ''', (media_id, custom_prompt_input, summary, datetime.now().strftime('%Y-%m-%d')))
             # Process keywords
             for keyword in keyword_list:
         schedule_chunking(media_id, content, info_dict.get('title', 'Untitled'))
         action = "updated" if existing_media and overwrite else "added"
+        return f"Media '{info_dict.get('title', 'Untitled')}' {action} with URL: {url}" + \
+            (f" and keywords: {', '.join(keyword_list)}. Chunking scheduled." if action in ["updated", "added"] else "")
     except DatabaseError as e:
         logging.error(f"Database error: {e}")

App_Function_Libraries/Utils/Utils.py CHANGED Viewed

@@ -15,8 +15,6 @@
 # 6. normalize_title(title)
 # 7.
 #
-#
-#
 ####################
 #
 # Import necessary libraries
@@ -256,6 +254,7 @@ def load_and_log_configs():
         logging.debug(f"Loaded Tabby API IP: {tabby_api_IP}")
         logging.debug(f"Loaded VLLM API URL: {vllm_api_url}")
         # Retrieve output paths from the configuration file
         output_path = config.get('Paths', 'output_path', fallback='results')
         logging.debug(f"Output path set to: {output_path}")
@@ -264,6 +263,18 @@ def load_and_log_configs():
         processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
         logging.debug(f"Processing choice set to: {processing_choice}")
         # Prompts - FIXME
         prompt_path = config.get('Prompts', 'prompt_path', fallback='Databases/prompts.db')
@@ -320,6 +331,16 @@ def load_and_log_configs():
                 'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200),
                 'chroma_db_path': get_project_relative_path(config.get('Database', 'chroma_db_path', fallback='Databases/chroma.db'))
             },
         }
     except Exception as e:
@@ -513,31 +534,49 @@ def create_download_directory(title):
     return session_path
 def safe_read_file(file_path):
-    encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1', 'iso-8859-1', 'cp1252']
     try:
         with open(file_path, 'rb') as file:
             raw_data = file.read()
     except FileNotFoundError:
         return f"File not found: {file_path}"
     except Exception as e:
         return f"An error occurred while reading the file: {e}"
     # Use chardet to detect the encoding
     detected = chardet.detect(raw_data)
     if detected['encoding'] is not None:
         encodings.insert(0, detected['encoding'])
     for encoding in encodings:
         try:
             decoded_content = raw_data.decode(encoding)
-            if decoded_content.isprintable():
                 return decoded_content
         except UnicodeDecodeError:
             continue
-    return f"Unable to decode the file {file_path} with any of the attempted encodings: {encodings}"
 #
 # End of Files-saving Function Definitions

 # 6. normalize_title(title)
 # 7.
 #
 ####################
 #
 # Import necessary libraries
         logging.debug(f"Loaded Tabby API IP: {tabby_api_IP}")
         logging.debug(f"Loaded VLLM API URL: {vllm_api_url}")
         # Retrieve output paths from the configuration file
         output_path = config.get('Paths', 'output_path', fallback='results')
         logging.debug(f"Output path set to: {output_path}")
         processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
         logging.debug(f"Processing choice set to: {processing_choice}")
+        # Retrieve Embedding model settings from the configuration file
+        embedding_model = config.get('Embeddings', 'embedding_model', fallback='')
+        logging.debug(f"Embedding model set to: {embedding_model}")
+        embedding_provider = config.get('Embeddings', 'embedding_provider', fallback='')
+        embedding_model = config.get('Embeddings', 'embedding_model', fallback='')
+        onnx_model_path = config.get('Embeddings', 'onnx_model_path', fallback="./App_Function_Libraries/onnx_models/text-embedding-3-small.onnx")
+        model_dir = config.get('Embeddings', 'model_dir', fallback="./App_Function_Libraries/onnx_models")
+        embedding_api_url = config.get('Embeddings', 'embedding_api_url', fallback="http://localhost:8080/v1/embeddings")
+        embedding_api_key = config.get('Embeddings', 'embedding_api_key', fallback='')
+        chunk_size = config.get('Embeddings', 'chunk_size', fallback=400)
+        overlap = config.get('Embeddings', 'overlap', fallback=200)
         # Prompts - FIXME
         prompt_path = config.get('Prompts', 'prompt_path', fallback='Databases/prompts.db')
                 'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200),
                 'chroma_db_path': get_project_relative_path(config.get('Database', 'chroma_db_path', fallback='Databases/chroma.db'))
             },
+            'embedding_config': {
+                'embedding_provider': embedding_provider,
+                'embedding_model': embedding_model,
+                'onnx_model_path': onnx_model_path,
+                'model_dir': model_dir,
+                'embedding_api_url': embedding_api_url,
+                'embedding_api_key': embedding_api_key,
+                'chunk_size': chunk_size,
+                'overlap': overlap
+            }
         }
     except Exception as e:
     return session_path
+import chardet
+import logging
 def safe_read_file(file_path):
+    encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1', 'iso-8859-1', 'cp1252', 'utf-8-sig']
+    logging.info(f"Attempting to read file: {file_path}")
     try:
         with open(file_path, 'rb') as file:
             raw_data = file.read()
     except FileNotFoundError:
+        logging.error(f"File not found: {file_path}")
         return f"File not found: {file_path}"
     except Exception as e:
+        logging.error(f"An error occurred while reading the file: {e}")
         return f"An error occurred while reading the file: {e}"
+    if not raw_data:
+        logging.warning(f"File is empty: {file_path}")
+        return ""
     # Use chardet to detect the encoding
     detected = chardet.detect(raw_data)
     if detected['encoding'] is not None:
         encodings.insert(0, detected['encoding'])
+        logging.info(f"Detected encoding: {detected['encoding']}")
     for encoding in encodings:
         try:
             decoded_content = raw_data.decode(encoding)
+            # Check if the content is mostly printable
+            if sum(c.isprintable() for c in decoded_content) / len(decoded_content) > 0.95:
+                logging.info(f"Successfully decoded file with encoding: {encoding}")
                 return decoded_content
         except UnicodeDecodeError:
+            logging.debug(f"Failed to decode with {encoding}")
             continue
+    # If all decoding attempts fail, return the error message
+    logging.error(f"Unable to decode the file {file_path}")
+    return f"Unable to decode the file {file_path}"
 #
 # End of Files-saving Function Definitions