Spaces:
Sleeping
Sleeping
import random # Import the random module | |
from groq import Groq | |
from openai import OpenAI | |
import os | |
import io | |
import base64 | |
from huggingface_hub import InferenceApi | |
class PromptClass: | |
def __init__(self): | |
self.huggingface_token = os.environ.get("HF_TOKEN") | |
self.groq_api_key = os.environ.get("GROQ_TOKEN") | |
self.sambanova_api_key = os.environ.get("SAMBANOVA_TOKEN") | |
print(self.sambanova_api_key) | |
self.huggingface_client = OpenAI( | |
base_url="https://api-inference.huggingface.co/v1/", | |
api_key=self.huggingface_token, | |
) | |
self.groq_client = Groq(api_key=self.groq_api_key) | |
self.sambanova_client = OpenAI( | |
api_key=self.sambanova_api_key, | |
base_url="https://api.sambanova.ai/v1", | |
) | |
self.download_models() | |
def download_models(self): | |
from huggingface_hub import hf_hub_download | |
hf_hub_download( | |
repo_id="stabilityai/stable-diffusion-3.5-large", | |
filename="mmdit.png", | |
local_dir = "./models", | |
token = self.huggingface_token | |
) | |
hf_hub_download( | |
repo_id="stabilityai/stable-diffusion-3.5-large-turbo", | |
filename="LICENSE.md", | |
local_dir = "./models", | |
token = self.huggingface_token | |
) | |
def generate_prompt(self, dynamic_seed, prompt_type, custom_input): | |
""" | |
Generates a prompt based on the provided seed, prompt type, and custom input. | |
""" | |
random.seed(dynamic_seed) | |
if custom_input and custom_input.strip(): | |
prompt = custom_input | |
else: | |
prompt = f"Create a random prompt based on the '{prompt_type}' type." | |
# Additional logic can be added here if needed | |
print(f"Generated prompt: {prompt}") # Debug statement | |
return prompt | |
def generate( | |
self, | |
input_text, | |
long_talk, | |
compress, | |
compression_level, | |
poster, | |
prompt_type, | |
custom_base_prompt="", | |
provider="Hugging Face", | |
model=None, | |
): | |
try: | |
# Define prompts | |
default_long_prompt = """Create a detailed visually descriptive caption of this description, | |
which will be used as a prompt for a text to image AI system (caption only, no instructions like "create an image"). | |
Remove any mention of digital artwork or artwork style. Give detailed visual descriptions of the character(s), including ethnicity, skin tone, expression etc. | |
Imagine using keywords for a still for someone who has aphantasia. Describe the image style, e.g., any photographic or art styles/techniques utilized. | |
Make sure to fully describe all aspects of the cinematography, with abundant technical details and visual descriptions. | |
If there is more than one image, combine the elements and characters from all of the images creatively into a single | |
cohesive composition with a single background, inventing an interaction between the characters. | |
Be creative in combining the characters into a single cohesive scene. | |
Focus on two primary characters (or one) and describe an interesting interaction between them, such as a hug, a kiss, a fight, giving an object, | |
an emotional reaction/interaction. If there is more than one background in the images, pick the most appropriate one. | |
Your output is only the caption itself, no comments or extra formatting. | |
The caption is in a single long paragraph. | |
If you feel the images are inappropriate, invent a new scene/characters inspired by these. | |
Additionally, incorporate a specific movie director's visual style and describe the lighting setup in detail, | |
including the type, color, and placement of light sources to create the desired mood and atmosphere. | |
Always frame the scene, including details about the film grain, color grading, and any artifacts or characteristics specific.""" | |
default_simple_prompt = """Create a brief, straightforward caption for this description, suitable for a text-to-image AI system. | |
Focus on the main elements, key characters, and overall scene without elaborate details. | |
Provide a clear and concise description in one or two sentences. Your output is only the caption itself, no comments or extra formatting. | |
The caption is in a single long paragraph.""" | |
poster_prompt = """Analyze the provided description and extract key information to create a movie poster style description. Format the output as follows: | |
Title: A catchy, intriguing title that captures the essence of the scene, place the title in "". | |
Main character: Give a description of the main character. | |
Background: Describe the background in detail. | |
Supporting characters: Describe the supporting characters. | |
Branding type: Describe the branding type. | |
Tagline: Include a tagline that captures the essence of the movie. | |
Visual style: Ensure that the visual style fits the branding type and tagline. | |
You are allowed to make up film and branding names, and do them like 80's, 90's or modern movie posters. | |
Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" | |
only_objects_prompt = """Create a highly detailed and visually rich description focusing solely on inanimate objects, | |
without including any human or animal figures. Describe the objects' shapes, sizes, colors, textures, and materials in great detail. | |
Pay attention to their arrangement, positioning, and how they interact with light and shadow. Include information about the setting | |
or environment these objects are in, such as indoor/outdoor, time of day, weather conditions, and any atmospheric effects. | |
Mention any unique features, patterns, or imperfections on the objects. Describe the overall composition, perspective, and | |
any artistic techniques that might be employed to render these objects (e.g., photorealism, impressionistic style, etc.). | |
Your description should paint a vivid picture that allows someone to imagine the scene without seeing it, focusing on the beauty, | |
complexity, or significance of everyday objects. Your output is only the caption itself, no comments or extra formatting. | |
The caption is in a single long paragraph.""" | |
no_figure_prompt = """Generate a comprehensive and visually evocative description of a scene | |
or landscape without including any human or animal figures. Focus on the environment, natural elements, and man-made structures if present. | |
Describe the topography, vegetation, weather conditions, and time of day in great detail. | |
Pay attention to colors, textures, and how light interacts with different elements of the scene. | |
If there are buildings or other structures, describe their architecture, condition, and how they fit into the landscape. | |
Include sensory details beyond just visual elements - mention sounds, smells, and the overall atmosphere or mood of the scene. | |
Describe any notable features like bodies of water, geological formations, or sky phenomena. | |
Consider the perspective from which the scene is viewed and how this affects the composition. | |
Your description should transport the reader to this location, allowing them to vividly imagine the scene without any living subjects present. | |
Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" | |
landscape_prompt = """Create an immersive and detailed description of a landscape, | |
focusing on its natural beauty and geographical features. | |
Begin with the overall topography - is it mountainous, coastal, forested, desert, or a combination? | |
Describe the horizon and how land meets sky. Detail the vegetation, noting types of trees, flowers, or grass, | |
and how they're distributed across the landscape. Include information about any water features - | |
rivers, lakes, oceans - and how they interact with the land. Describe the sky, including cloud formations, | |
color gradients, and any celestial bodies visible. | |
Pay attention to the quality of light, time of day, and season, explaining how these factors affect the colors and shadows in the scene. | |
Include details about weather conditions and how they impact the landscape. | |
Mention any geological features like rock formations, cliffs, or unique land patterns. | |
If there are any distant man-made elements, describe how they integrate with the natural setting. | |
Your description should capture the grandeur and mood of the landscape, | |
allowing the reader to feel as if they're standing within this awe-inspiring natural scene. | |
Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" | |
fantasy_prompt = """Craft an extraordinarily detailed and imaginative description of a fantasy scene, | |
blending elements of magic, otherworldly creatures, and fantastical environments. Begin by setting the overall tone - | |
is this a dark and foreboding realm, a whimsical fairytale setting, or an epic high-fantasy world? | |
Describe the landscape, including any impossible or magical geographical features like floating islands, | |
crystal forests, or rivers of starlight. Detail the flora and fauna, | |
focusing on fantastical plants and creatures that don't exist in our world. | |
Include descriptions of any structures or ruins, emphasizing their otherworldly architecture and magical properties. | |
Describe the sky and any celestial bodies, considering how they might differ from our reality. | |
Include details about the presence of magic - how it manifests visually, | |
its effects on the environment, and any magical phenomena occurring in the scene. | |
If there are characters present, describe their appearance, focusing on non-human features, magical auras, or | |
fantastical clothing and accessories. Pay attention to colors, textures, and light sources, | |
especially those that couldn't exist in the real world. Your description should transport the | |
reader to a realm of pure imagination, where the laws of physics and nature as we know them don't apply. | |
Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" | |
prompt_types = { | |
"Long": default_long_prompt, | |
"Short": default_simple_prompt, | |
"Medium": poster_prompt, | |
"OnlyObjects": only_objects_prompt, | |
"NoFigure": no_figure_prompt, | |
"Landscape": landscape_prompt, | |
"Fantasy": fantasy_prompt, | |
} | |
# Determine the base prompt | |
print(f"Received prompt_type: '{prompt_type}'") # Debug print | |
if prompt_type == "Random": | |
prompt_type = random.choice(list(prompt_types.keys())) | |
print(f"Randomly selected prompt type: {prompt_type}") | |
if prompt_type and prompt_type.strip() and prompt_type in prompt_types: | |
base_prompt = prompt_types[prompt_type] | |
print(f"Using {prompt_type} prompt") | |
elif custom_base_prompt.strip(): | |
base_prompt = custom_base_prompt | |
print("Using custom base prompt") | |
else: | |
base_prompt = default_long_prompt | |
print(f"Warning: Unknown or empty prompt type '{prompt_type}'. Using default long prompt.") | |
# Handle compression if applicable | |
if compress and not poster: | |
compression_chars = { | |
"soft": 600 if long_talk else 300, | |
"medium": 400 if long_talk else 200, | |
"hard": 200 if long_talk else 100, | |
} | |
char_limit = compression_chars.get(compression_level, 200) | |
base_prompt += f" Compress the output to be concise while retaining key visual details. MAX OUTPUT SIZE no more than {char_limit} characters." | |
# Construct messages for the LLM | |
system_message = "You are a helpful assistant. Try your best to give the best response possible to the user." | |
if input_text.startswith("Create a random prompt based on"): | |
user_message = f"Create a random description based on this\nInstructions: {base_prompt}" | |
else: | |
user_message = f"{base_prompt}\nDescription: {input_text}" | |
# Generate a random seed | |
seed = random.randint(0, 10000) | |
print(f"Generated seed: {seed}") # Debug print | |
# Select the appropriate provider | |
if provider == "Hugging Face": | |
response = self.huggingface_client.chat.completions.create( | |
model=model or "meta-llama/Meta-Llama-3.1-70B-Instruct", | |
max_tokens=1024, | |
temperature=1.0, | |
top_p=0.95, | |
messages=[ | |
{"role": "system", "content": system_message}, | |
{"role": "user", "content": user_message}, | |
], | |
seed=seed # Pass the seed parameter | |
) | |
output = response.choices[0].message.content.strip() | |
elif provider == "Groq": | |
response = self.groq_client.chat.completions.create( | |
model=model or "llama-3.1-70b-versatile", | |
max_tokens=1024, | |
temperature=1.0, | |
messages=[ | |
{"role": "system", "content": system_message}, | |
{"role": "user", "content": user_message}, | |
], | |
seed=seed # Pass the seed parameter | |
) | |
output = response.choices[0].message.content.strip() | |
elif provider == "SambaNova": | |
response = self.sambanova_client.chat.completions.create( | |
model=model or "Meta-Llama-3.1-70B-Instruct", | |
max_tokens=1024, | |
temperature=1.0, | |
messages=[ | |
{"role": "system", "content": system_message}, | |
{"role": "user", "content": user_message}, | |
], | |
seed=seed # Pass the seed parameter | |
) | |
output = response.choices[0].message.content.strip() | |
else: | |
raise ValueError(f"Unsupported provider: {provider}") | |
# Clean up the output if necessary | |
if ": " in output: | |
output = output.split(": ", 1)[1].strip() | |
elif output.lower().startswith("here"): | |
sentences = output.split(". ") | |
if len(sentences) > 1: | |
output = ". ".join(sentences[1:]).strip() | |
return output | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
return f"Error occurred while processing the request: {str(e)}" | |
def chat(self,provider="Hugging Face",model=None,input_text=None): | |
seed = random.randint(0, 10000) | |
if input_text != "": | |
# Select the appropriate provider | |
if provider == "Hugging Face": | |
response = self.huggingface_client.chat.completions.create( | |
model=model or "meta-llama/Meta-Llama-3.1-70B-Instruct", | |
max_tokens=1024, | |
temperature=1.0, | |
top_p=0.95, | |
messages=input_text, | |
seed=seed # Pass the seed parameter | |
) | |
output = response.choices[0].message.content.strip() | |
elif provider == "Groq": | |
response = self.groq_client.chat.completions.create( | |
model=model or "llama-3.1-70b-versatile", | |
max_tokens=1024, | |
temperature=1.0, | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant"}, | |
{"role": "user", "content": input_text}, | |
], | |
seed=seed # Pass the seed parameter | |
) | |
output = response.choices[0].message.content.strip() | |
elif provider == "SambaNova": | |
response = self.sambanova_client.chat.completions.create( | |
model=model or "Meta-Llama-3.1-70B-Instruct", | |
max_tokens=1024, | |
temperature=1.0, | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant"}, | |
{"role": "user", "content": input_text}, | |
], | |
seed=seed # Pass the seed parameter | |
) | |
output = response.choices[0].message.content.strip() | |
else: | |
raise ValueError(f"Unsupported provider: {provider}") | |
# Clean up the output if necessary | |
if ": " in output: | |
output = output.split(": ", 1)[1].strip() | |
elif output.lower().startswith("here"): | |
sentences = output.split(". ") | |
if len(sentences) > 1: | |
output = ". ".join(sentences[1:]).strip() | |
return output | |
def img2text(self,image=None): | |
if image: | |
# Select the appropriate provider | |
inference = InferenceApi(repo_id="Salesforce/blip-image-captioning-base", token=self.huggingface_token) | |
# Đọc file hình ảnh | |
image_bytes = io.BytesIO() | |
image.save(image_bytes, format="JPEG") | |
image_data = image_bytes.getvalue() | |
image_base64 = base64.b64encode(image_data).decode("utf-8") | |
# Gửi yêu cầu API | |
response = inference(inputs={"image":image_base64}) | |
return response[0]["generated_text"] |