Spaces:

Zelyanoth
/

Lin

Runtime error

Lin / backend /services /content_service.py

35c65a3 4 minutes ago

7.97 kB

	import re
	import json
	import unicodedata
	import io
	from flask import current_app
	from gradio_client import Client
	import pandas as pd
	from PIL import Image
	import base64

	class ContentService:
	"""Service for AI content generation using Hugging Face models."""

	def __init__(self, hugging_key=None):
	# Use provided key or fall back to app config
	self.hugging_key = hugging_key or current_app.config.get('HUGGING_KEY')
	# Initialize the Gradio client for content generation
	self.client = Client("Zelyanoth/Linkedin_poster_dev", hf_token=self.hugging_key)

	def validate_unicode_content(self, content):
	"""Validate Unicode content while preserving original formatting and spaces."""
	if not content or not isinstance(content, str):
	return content

	try:
	# Test if content can be encoded as UTF-8
	content.encode('utf-8')
	return content # Return original content if it's valid UTF-8
	except UnicodeEncodeError:
	try:
	# If encoding fails, try to preserve as much as possible
	return content.encode('utf-8', errors='replace').decode('utf-8')
	except:
	# Ultimate fallback
	return str(content)

	def preserve_formatting(self, content):
	"""Preserve spaces, line breaks, and paragraph formatting."""
	if not content:
	return content

	# Preserve all whitespace characters including spaces, tabs, and newlines
	# This ensures that paragraph breaks and indentation are maintained
	try:
	# Test encoding first
	content.encode('utf-8')
	return content
	except UnicodeEncodeError:
	# Fallback with error replacement but preserve whitespace
	return content.encode('utf-8', errors='replace').decode('utf-8')

	def sanitize_content_for_api(self, content):
	"""Sanitize content for API calls while preserving original text, spaces, and formatting."""
	if not content:
	return content

	# First preserve formatting and spaces
	preserved = self.preserve_formatting(content)

	# Only validate Unicode, don't remove spaces or formatting
	validated = self.validate_unicode_content(preserved)

	# Only remove null bytes that might cause issues in API calls
	if '\x00' in validated:
	validated = validated.replace('\x00', '')

	# Ensure line breaks and spaces are preserved
	validated = validated.replace('\r\n', '\n').replace('\r', '\n')

	return validated

	def _is_base64_image(self, data):
	"""Check if the data is a base64 encoded image string."""
	if not isinstance(data, str):
	return False

	# Check if it starts with data URL prefix
	if data.startswith('data:image/'):
	return True

	# Try to decode as base64
	try:
	# Extract base64 part if it's a data URL
	if ',' in data:
	base64_part = data.split(',')[1]
	else:
	base64_part = data

	# Try to decode
	base64.b64decode(base64_part, validate=True)
	return True
	except Exception:
	return False

	def _base64_to_bytes(self, base64_string):
	"""Convert a base64 encoded string to bytes."""
	try:
	# If it's a data URL, extract the base64 part
	if base64_string.startswith('data:image/'):
	base64_part = base64_string.split(',')[1]
	else:
	base64_part = base64_string

	# Decode base64 to bytes
	return base64.b64decode(base64_part, validate=True)
	except Exception as e:
	current_app.logger.error(f"Failed to decode base64 image: {str(e)}")
	raise Exception(f"Failed to decode base64 image: {str(e)}")

	def generate_post_content(self, user_id: str) -> tuple:
	"""
	Generate post content using AI.

	Args:
	user_id (str): User ID for personalization

	Returns:
	tuple: (Generated post content, Image URL or None)
	"""
	try:
	# Call the Hugging Face model to generate content
	result = self.client.predict(
	code=user_id,
	api_name="/poster_linkedin"
	)

	# Parse the result (assuming it returns a list with content as first element)
	# First try to parse as JSON
	try:
	parsed_result = json.loads(result)
	except json.JSONDecodeError:
	# If JSON parsing fails, check if it's already a Python list/object
	try:
	# Try to evaluate as Python literal (safe for lists/dicts)
	import ast
	parsed_result = ast.literal_eval(result)
	except (ValueError, SyntaxError):
	# If that fails, treat the result as a plain string
	parsed_result = [result]

	# Extract the first element if it's a list
	if isinstance(parsed_result, list):
	generated_content = parsed_result[0] if parsed_result and parsed_result[0] is not None else "Generated content will appear here..."
	# Extract the second element as image URL if it exists
	image_data = parsed_result[1] if len(parsed_result) > 1 and parsed_result[1] is not None else None
	else:
	generated_content = str(parsed_result) if parsed_result is not None else "Generated content will appear here..."
	image_data = None

	# Validate, sanitize, and preserve formatting of the generated content
	sanitized_content = self.sanitize_content_for_api(generated_content)

	# Ensure paragraph breaks and formatting are preserved
	final_content = self.preserve_formatting(sanitized_content)

	# Handle image data - could be URL or base64
	image_bytes = None
	if image_data:
	if self._is_base64_image(image_data):
	# Convert base64 to bytes for storage
	image_bytes = self._base64_to_bytes(image_data)
	else:
	# It's a URL, keep as string
	image_bytes = image_data

	return (final_content, image_bytes)

	except Exception as e:
	error_message = str(e)
	current_app.logger.error(f"Content generation failed: {error_message}")
	raise Exception(f"Content generation failed: {error_message}")

	def add_rss_source(self, rss_link: str, user_id: str) -> str:
	"""
	Add an RSS source for content generation.

	Args:
	rss_link (str): RSS feed URL
	user_id (str): User ID

	Returns:
	str: Result message
	"""
	try:
	# Call the Hugging Face model to add RSS source
	rss_input = f"{rss_link}__thi_irrh'èçs_my_id__! {user_id}"
	sanitized_rss_input = self.sanitize_content_for_api(rss_input)

	result = self.client.predict(
	rss_link=sanitized_rss_input,
	api_name="/ajouter_rss"
	)

	# Sanitize and preserve formatting of the result
	sanitized_result = self.sanitize_content_for_api(result)
	return self.preserve_formatting(sanitized_result)

	except Exception as e:
	raise Exception(f"Failed to add RSS source: {str(e)}")