Spaces:

Zelyanoth
/

Lin

Running

Lin

File size: 7,969 Bytes

import re
import json
import unicodedata
import io
from flask import current_app
from gradio_client import Client
import pandas as pd
from PIL import Image
import base64

class ContentService:
    """Service for AI content generation using Hugging Face models."""
    
    def __init__(self, hugging_key=None):
        # Use provided key or fall back to app config
        self.hugging_key = hugging_key or current_app.config.get('HUGGING_KEY')
        # Initialize the Gradio client for content generation
        self.client = Client("Zelyanoth/Linkedin_poster_dev", hf_token=self.hugging_key)
    
    def validate_unicode_content(self, content):
        """Validate Unicode content while preserving original formatting and spaces."""
        if not content or not isinstance(content, str):
            return content
        
        try:
            # Test if content can be encoded as UTF-8
            content.encode('utf-8')
            return content  # Return original content if it's valid UTF-8
        except UnicodeEncodeError:
            try:
                # If encoding fails, try to preserve as much as possible
                return content.encode('utf-8', errors='replace').decode('utf-8')
            except:
                # Ultimate fallback
                return str(content)
    
    def preserve_formatting(self, content):
        """Preserve spaces, line breaks, and paragraph formatting."""
        if not content:
            return content
        
        # Preserve all whitespace characters including spaces, tabs, and newlines
        # This ensures that paragraph breaks and indentation are maintained
        try:
            # Test encoding first
            content.encode('utf-8')
            return content
        except UnicodeEncodeError:
            # Fallback with error replacement but preserve whitespace
            return content.encode('utf-8', errors='replace').decode('utf-8')
    
    def sanitize_content_for_api(self, content):
        """Sanitize content for API calls while preserving original text, spaces, and formatting."""
        if not content:
            return content
        
        # First preserve formatting and spaces
        preserved = self.preserve_formatting(content)
        
        # Only validate Unicode, don't remove spaces or formatting
        validated = self.validate_unicode_content(preserved)
        
        # Only remove null bytes that might cause issues in API calls
        if '\x00' in validated:
            validated = validated.replace('\x00', '')
        
        # Ensure line breaks and spaces are preserved
        validated = validated.replace('\r\n', '\n').replace('\r', '\n')
        
        return validated
    
    def _is_base64_image(self, data):
        """Check if the data is a base64 encoded image string."""
        if not isinstance(data, str):
            return False
        
        # Check if it starts with data URL prefix
        if data.startswith('data:image/'):
            return True
        
        # Try to decode as base64
        try:
            # Extract base64 part if it's a data URL
            if ',' in data:
                base64_part = data.split(',')[1]
            else:
                base64_part = data
            
            # Try to decode
            base64.b64decode(base64_part, validate=True)
            return True
        except Exception:
            return False
    
    def _base64_to_bytes(self, base64_string):
        """Convert a base64 encoded string to bytes."""
        try:
            # If it's a data URL, extract the base64 part
            if base64_string.startswith('data:image/'):
                base64_part = base64_string.split(',')[1]
            else:
                base64_part = base64_string
            
            # Decode base64 to bytes
            return base64.b64decode(base64_part, validate=True)
        except Exception as e:
            current_app.logger.error(f"Failed to decode base64 image: {str(e)}")
            raise Exception(f"Failed to decode base64 image: {str(e)}")
    
    def generate_post_content(self, user_id: str) -> tuple:
        """
        Generate post content using AI.
        
        Args:
            user_id (str): User ID for personalization
            
        Returns:
            tuple: (Generated post content, Image URL or None)
        """
        try:
            # Call the Hugging Face model to generate content
            result = self.client.predict(
                code=user_id,
                api_name="/poster_linkedin"
            )
            
            # Parse the result (assuming it returns a list with content as first element)
            # First try to parse as JSON
            try:
                parsed_result = json.loads(result)
            except json.JSONDecodeError:
                # If JSON parsing fails, check if it's already a Python list/object
                try:
                    # Try to evaluate as Python literal (safe for lists/dicts)
                    import ast
                    parsed_result = ast.literal_eval(result)
                except (ValueError, SyntaxError):
                    # If that fails, treat the result as a plain string
                    parsed_result = [result]
            
            # Extract the first element if it's a list
            if isinstance(parsed_result, list):
                generated_content = parsed_result[0] if parsed_result and parsed_result[0] is not None else "Generated content will appear here..."
                # Extract the second element as image URL if it exists
                image_data = parsed_result[1] if len(parsed_result) > 1 and parsed_result[1] is not None else None
            else:
                generated_content = str(parsed_result) if parsed_result is not None else "Generated content will appear here..."
                image_data = None
                
            # Validate, sanitize, and preserve formatting of the generated content
            sanitized_content = self.sanitize_content_for_api(generated_content)
            
            # Ensure paragraph breaks and formatting are preserved
            final_content = self.preserve_formatting(sanitized_content)
            
            # Handle image data - could be URL or base64
            image_bytes = None
            if image_data:
                if self._is_base64_image(image_data):
                    # Convert base64 to bytes for storage
                    image_bytes = self._base64_to_bytes(image_data)
                else:
                    # It's a URL, keep as string
                    image_bytes = image_data
            
            return (final_content, image_bytes)
            
        except Exception as e:
            error_message = str(e)
            current_app.logger.error(f"Content generation failed: {error_message}")
            raise Exception(f"Content generation failed: {error_message}")
    
    def add_rss_source(self, rss_link: str, user_id: str) -> str:
        """
        Add an RSS source for content generation.
        
        Args:
            rss_link (str): RSS feed URL
            user_id (str): User ID
            
        Returns:
            str: Result message
        """
        try:
            # Call the Hugging Face model to add RSS source
            rss_input = f"{rss_link}__thi_irrh'èçs_my_id__! {user_id}"
            sanitized_rss_input = self.sanitize_content_for_api(rss_input)
            
            result = self.client.predict(
                rss_link=sanitized_rss_input,
                api_name="/ajouter_rss"
            )
            
            # Sanitize and preserve formatting of the result
            sanitized_result = self.sanitize_content_for_api(result)
            return self.preserve_formatting(sanitized_result)
            
        except Exception as e:
            raise Exception(f"Failed to add RSS source: {str(e)}")