Spaces:
Sleeping
Sleeping
File size: 6,359 Bytes
cd87c75 8ec4292 25edbdd a30da41 34e2ac8 a30da41 34e2ac8 2e0ecb2 34e2ac8 2e0ecb2 34e2ac8 2e0ecb2 34e2ac8 25edbdd cd87c75 8ec4292 cd87c75 8ec4292 cd87c75 8ec4292 cd87c75 8ec4292 cd87c75 9082963 cd87c75 9082963 cd87c75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import gradio as gr
import spaces
import json
import re
from gradio_client import Client
def get_caption_from_kosmos(image_in):
kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/")
kosmos2_result = kosmos2_client.predict(
image_in, # str (filepath or URL to image) in 'Test Image' Image component
"Detailed", # str in 'Description Type' Radio component
fn_index=4
)
print(f"KOSMOS2 RETURNS: {kosmos2_result}")
with open(kosmos2_result[1], 'r') as f:
data = json.load(f)
reconstructed_sentence = []
for sublist in data:
reconstructed_sentence.append(sublist[0])
full_sentence = ' '.join(reconstructed_sentence)
#print(full_sentence)
# Find the pattern matching the expected format ("Describe this image in detail:" followed by optional space and then the rest)...
pattern = r'^Describe this image in detail:\s*(.*)$'
# Apply the regex pattern to extract the description text.
match = re.search(pattern, full_sentence)
if match:
description = match.group(1)
print(description)
else:
print("Unable to locate valid description.")
# Find the last occurrence of "."
#last_period_index = full_sentence.rfind('.')
# Truncate the string up to the last period
#truncated_caption = full_sentence[:last_period_index + 1]
# print(truncated_caption)
#print(f"\n—\nIMAGE CAPTION: {truncated_caption}")
return description
def get_caption_from_MD(image_in):
client = Client("https://vikhyatk-moondream1.hf.space/")
result = client.predict(
image_in, # filepath in 'image' Image component
"Describe character like if it was fictional", # str in 'Question' Textbox component
api_name="/answer_question"
)
print(result)
return result
import re
import torch
from transformers import pipeline
pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")
@spaces.GPU(enable_queue=True)
def get_card_idea(user_prompt):
agent_maker_sys = f'''
You know everything about Magic Cards and Magic Card rules.
Your job is to generate new magic card from an image description.
Example 1:
"The image represents the famous painting "The Mona Lisa" by the Italian artist Leonardo da Vinci. The painting is a portrait of a woman with a distinctive smile, and it is known for its realistic style and the use of the sfumato technique, which creates a soft, smoky effect around the edges of the painting. The painting is displayed in a museum, and it is considered one of the most iconic and recognizable works of art in the world."
Bot Response:
"Mona Lisa's Enigma [2][W][U][B]
Enchantment
At the beginning of your upkeep, add one mana of any color to your mana pool for each color among permanents you control.
[W][U][B]: Target player puts the top X cards of their library into their graveyard, where X is the number of colors among permanents you control.
Her alluring smile masks unfathomable depths.
Mythic"
Example 2:
"The image features a fluffy, white and gray cat sitting on a couch. The cat has a surprised expression on its face, as if it has just heard or seen something unexpected. The cat's position on the couch and its attentive gaze towards the camera give the impression that it is a well-known or famous cat, perhaps a popular pet or a subject in a movie or TV show. However, without more context or information, it is not possible to definitively identify the cat as something famous. »
Bot Response:
"Feline Dominator
[2][G][W]
Creature - Cat
Whenever Feline Dominator attacks, it gets +1/+1 until end of turn for each other attacking Cat you control.
Whenever Feline Dominator deals combat damage, you gain that much life.
The true ruler of the house, demanding tribute from all who enter its domain.
2/4
Uncommon"
'''
instruction = f"""
<|system|>
{agent_maker_sys}</s>
<|user|>
"""
prompt = f"{instruction.strip()}\n{user_prompt}</s>"
#print(f"PROMPT: {prompt}")
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
return outputs
def infer(image_in, cap_type):
gr.Info("Getting image description...")
if cap_type == "Fictional" :
user_prompt = get_caption_from_MD(image_in)
elif cap_type == "Literal" :
user_prompt = get_caption_from_kosmos(image_in)
gr.Info("Building a new card according to the image caption ...")
outputs = get_card_idea(user_prompt)
pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
print(f"SUGGESTED CARD: {cleaned_text}")
return cleaned_text.lstrip("\n")
title = f"Magic Card Generator",
description = f""
css = """
#col-container{
margin: 0 auto;
max-width: 780px;
text-align: left;
}
/* fix examples gallery width on mobile */
div#component-14 > .gallery > .gallery-item > .container > img {
width: auto!important;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(f"""
<h2 style="text-align: center;">Magic Card Generator</h2>
<p style="text-align: center;">{description}</p>
""")
with gr.Row():
with gr.Column():
image_in = gr.Image(
label = "Image reference",
type = "filepath",
elem_id = "image-in"
)
cap_type = gr.Radio(
label = "Caption type",
choices = [
"Literal",
"Fictional"
],
value = "Fictional"
)
submit_btn = gr.Button("Make LLM system from my pic !")
with gr.Column():
result = gr.Textbox(
label = "Suggested Card",
lines = 6,
max_lines = 30,
elem_id = "suggested-card"
)
submit_btn.click(
fn = infer,
inputs = [
image_in,
cap_type
],
outputs =[
result
]
)
demo.queue().launch(show_api=False, show_error=True) |