fffiloni's picture
Update app.py
e9acd30 verified
import gradio as gr
import spaces
import json
import re
from gradio_client import Client
def get_caption_from_kosmos(image_in):
kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/")
kosmos2_result = kosmos2_client.predict(
image_in, # str (filepath or URL to image) in 'Test Image' Image component
"Detailed", # str in 'Description Type' Radio component
fn_index=4
)
print(f"KOSMOS2 RETURNS: {kosmos2_result}")
with open(kosmos2_result[1], 'r') as f:
data = json.load(f)
reconstructed_sentence = []
for sublist in data:
reconstructed_sentence.append(sublist[0])
full_sentence = ' '.join(reconstructed_sentence)
#print(full_sentence)
# Find the pattern matching the expected format ("Describe this image in detail:" followed by optional space and then the rest)...
pattern = r'^Describe this image in detail:\s*(.*)$'
# Apply the regex pattern to extract the description text.
match = re.search(pattern, full_sentence)
if match:
description = match.group(1)
print(description)
else:
print("Unable to locate valid description.")
# Find the last occurrence of "."
#last_period_index = full_sentence.rfind('.')
# Truncate the string up to the last period
#truncated_caption = full_sentence[:last_period_index + 1]
# print(truncated_caption)
#print(f"\n—\nIMAGE CAPTION: {truncated_caption}")
return description
def get_caption_from_MD(image_in):
client = Client("https://vikhyatk-moondream1.hf.space/")
result = client.predict(
image_in, # filepath in 'image' Image component
"Describe character like if it was fictional", # str in 'Question' Textbox component
api_name="/answer_question"
)
print(result)
return result
import re
import torch
from transformers import pipeline
pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")
@spaces.GPU(enable_queue=True)
def get_card_idea(user_prompt):
agent_maker_sys = f'''
Your job is to generate new magic card from an image description given by user.
You will only provide one card idea.
Example 1:
"The image represents the famous painting "The Mona Lisa" by the Italian artist Leonardo da Vinci. The painting is a portrait of a woman with a distinctive smile, and it is known for its realistic style and the use of the sfumato technique, which creates a soft, smoky effect around the edges of the painting. The painting is displayed in a museum, and it is considered one of the most iconic and recognizable works of art in the world."
Bot Response:
"Mona Lisa's Enigma [2][W][U][B]
Enchantment
At the beginning of your upkeep, add one mana of any color to your mana pool for each color among permanents you control.
[W][U][B]: Target player puts the top X cards of their library into their graveyard, where X is the number of colors among permanents you control.
Her alluring smile masks unfathomable depths.
Mythic"
Example 2:
"The image features a fluffy, white and gray cat sitting on a couch. The cat has a surprised expression on its face, as if it has just heard or seen something unexpected. The cat's position on the couch and its attentive gaze towards the camera give the impression that it is a well-known or famous cat, perhaps a popular pet or a subject in a movie or TV show. However, without more context or information, it is not possible to definitively identify the cat as something famous. »
Bot Response:
"Feline Dominator
[2][G][W]
Creature - Cat
Whenever Feline Dominator attacks, it gets +1/+1 until end of turn for each other attacking Cat you control.
Whenever Feline Dominator deals combat damage, you gain that much life.
The true ruler of the house, demanding tribute from all who enter its domain.
2/4
Uncommon"
Only provide one card example according to image description.
'''
instruction = f"""
<|system|>
{agent_maker_sys}</s>
<|user|>
"""
prompt = f"{instruction.strip()}\n{user_prompt}</s>"
#print(f"PROMPT: {prompt}")
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
return outputs
def infer(image_in, cap_type):
gr.Info("Getting image description...")
if cap_type == "Fictional" :
user_prompt = get_caption_from_MD(image_in)
elif cap_type == "Literal" :
user_prompt = get_caption_from_kosmos(image_in)
gr.Info("Building a new card according to the image caption ...")
outputs = get_card_idea(user_prompt)
pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
print(f"SUGGESTED CARD: {cleaned_text}")
return cleaned_text.lstrip("\n")
title = f"Magic Card Generator",
description = f""
css = """
#col-container{
margin: 0 auto;
max-width: 780px;
text-align: left;
}
/* fix examples gallery width on mobile */
div#component-14 > .gallery > .gallery-item > .container > img {
width: auto!important;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(f"""
<h2 style="text-align: center;">Magic Card Generator</h2>
<p style="text-align: center;">{description}</p>
""")
with gr.Row():
with gr.Column():
image_in = gr.Image(
label = "Image reference",
type = "filepath",
elem_id = "image-in"
)
cap_type = gr.Radio(
label = "Caption type",
choices = [
"Literal",
"Fictional"
],
value = "Fictional"
)
submit_btn = gr.Button("Make a card from my pic !")
gr.Examples(
examples = [
["examples/monalisa.png"],
["examples/violonist.png"],
["examples/frog.jpeg"],
["examples/samourai.png"]
],
fn = infer,
inputs = [image_in, cap_type]
)
with gr.Column():
result = gr.Textbox(
label = "Suggested Card",
lines = 6,
max_lines = 30,
elem_id = "suggested-card"
)
submit_btn.click(
fn = infer,
inputs = [
image_in,
cap_type
],
outputs =[
result
]
)
demo.queue().launch(show_api=False, show_error=True)