|
import openai |
|
from google.cloud import vision |
|
from google.oauth2 import service_account |
|
import io |
|
import google.generativeai as genai |
|
from diffusers import AutoPipelineForText2Image |
|
import torch |
|
import os |
|
import spaces |
|
|
|
|
|
|
|
def detect_text_in_image(image_path, credentials): |
|
|
|
|
|
client = vision.ImageAnnotatorClient(credentials=credentials) |
|
|
|
|
|
with io.open(image_path, 'rb') as image_file: |
|
content = image_file.read() |
|
|
|
|
|
image = vision.Image(content=content) |
|
|
|
|
|
response = client.text_detection(image=image) |
|
texts = response.text_annotations |
|
|
|
|
|
if response.error.message: |
|
raise Exception(f'{response.error.message}') |
|
|
|
|
|
return texts[0].description if texts else '' |
|
|
|
|
|
|
|
|
|
def summarize_diary_text(text, api_key): |
|
|
|
client = openai.Client(api_key=api_key) |
|
|
|
|
|
response = client.chat.completions.create( |
|
model="gpt-4", |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant."}, |
|
{"role": "user", "content": f"Summarize the following diary entry: {text}"} |
|
], |
|
max_tokens=150, |
|
temperature=0.7, |
|
n=1 |
|
) |
|
|
|
|
|
return response.choices[0].message.content |
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_writer_image(image_path, api_key): |
|
genai.configure(api_key=api_key) |
|
model = genai.GenerativeModel("gemini-1.5-flash") |
|
myfile = genai.upload_file(image_path) |
|
result = model.generate_content( |
|
[myfile, "\n\n", "Can you give a very short description of the person in the image?"] |
|
) |
|
return result.text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@spaces.GPU |
|
def generate_comic_book(diary_text, writer_description, num_pages=4): |
|
pipe = AutoPipelineForText2Image.from_pretrained( |
|
"stabilityai/sdxl-turbo", |
|
torch_dtype=torch.float16, |
|
variant="fp16", |
|
cache_dir="./SDXL-Turbo" |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pipe.to('cuda') |
|
|
|
|
|
os.makedirs("comic_book", exist_ok=True) |
|
|
|
|
|
diary_scenes = diary_text.split('.')[:num_pages] |
|
|
|
|
|
for i, scene in enumerate(diary_scenes): |
|
prompt = (f'Comic Book Style: \n' |
|
f'Actor Description: {writer_description} \n' |
|
f'Diary Scene: {scene.strip()}\n' |
|
f'Generate an cartoon image to represent this diary scene.') |
|
|
|
print(f"Generating comic page {i + 1} with prompt:\n{prompt}\n") |
|
|
|
|
|
image = pipe(prompt=prompt, num_inference_steps=30, guidance_scale=7.5).images[0] |
|
|
|
|
|
image_path = f"comic_book/page_{i + 1}.png" |
|
image.save(image_path) |
|
print(f"Page {i + 1} saved as {image_path}") |
|
|
|
print("Comic book generation complete!") |
|
|
|
|