File size: 6,821 Bytes
cd87c75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ec4292
25edbdd
f88166d
1c71e6b
34e2ac8
a30da41
34e2ac8
 
 
2e0ecb2
34e2ac8
 
2e0ecb2
34e2ac8
 
 
 
 
 
 
 
2e0ecb2
34e2ac8
 
 
 
 
 
f88166d
e9acd30
25edbdd
cd87c75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ec4292
 
cd87c75
 
 
 
 
8ec4292
cd87c75
8ec4292
cd87c75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ec4292
cd87c75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b9824c
1c71e6b
 
 
 
 
 
 
 
 
 
 
cd87c75
 
9082963
cd87c75
 
9082963
cd87c75
1c71e6b
cd87c75
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import gradio as gr
import spaces
import json
import re
from gradio_client import Client


def get_caption_from_kosmos(image_in):
    kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/")

    kosmos2_result = kosmos2_client.predict(
        image_in,	# str (filepath or URL to image) in 'Test Image' Image component
        "Detailed",	# str in 'Description Type' Radio component
        fn_index=4
    )

    print(f"KOSMOS2 RETURNS: {kosmos2_result}")

    with open(kosmos2_result[1], 'r') as f:
        data = json.load(f)
    
    reconstructed_sentence = []
    for sublist in data:
        reconstructed_sentence.append(sublist[0])

    full_sentence = ' '.join(reconstructed_sentence)
    #print(full_sentence)

    # Find the pattern matching the expected format ("Describe this image in detail:" followed by optional space and then the rest)...
    pattern = r'^Describe this image in detail:\s*(.*)$'
    # Apply the regex pattern to extract the description text.
    match = re.search(pattern, full_sentence)
    if match:
        description = match.group(1)
        print(description)
    else:
        print("Unable to locate valid description.")

    # Find the last occurrence of "."
    #last_period_index = full_sentence.rfind('.')

    # Truncate the string up to the last period
    #truncated_caption = full_sentence[:last_period_index + 1]

    # print(truncated_caption)
    #print(f"\n—\nIMAGE CAPTION: {truncated_caption}")
    
    return description

def get_caption_from_MD(image_in):
    client = Client("https://vikhyatk-moondream1.hf.space/")
    result = client.predict(
		image_in,	# filepath  in 'image' Image component
		"Describe character like if it was fictional",	# str  in 'Question' Textbox component
		api_name="/answer_question"
    )
    print(result)
    return result


import re
import torch
from transformers import pipeline

pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")

@spaces.GPU(enable_queue=True)
def get_card_idea(user_prompt):
    agent_maker_sys = f'''
Your job is to generate new magic card from an image description given by user. 
You will only provide one card idea.

Example 1:
"The image represents the famous painting "The Mona Lisa" by the Italian artist Leonardo da Vinci. The painting is a portrait of a woman with a distinctive smile, and it is known for its realistic style and the use of the sfumato technique, which creates a soft, smoky effect around the edges of the painting. The painting is displayed in a museum, and it is considered one of the most iconic and recognizable works of art in the world."

Bot Response:
"Mona Lisa's Enigma [2][W][U][B]
Enchantment
At the beginning of your upkeep, add one mana of any color to your mana pool for each color among permanents you control.
[W][U][B]: Target player puts the top X cards of their library into their graveyard, where X is the number of colors among permanents you control.
Her alluring smile masks unfathomable depths.
Mythic"

Example 2:
"The image features a fluffy, white and gray cat sitting on a couch. The cat has a surprised expression on its face, as if it has just heard or seen something unexpected. The cat's position on the couch and its attentive gaze towards the camera give the impression that it is a well-known or famous cat, perhaps a popular pet or a subject in a movie or TV show. However, without more context or information, it is not possible to definitively identify the cat as something famous. »

Bot Response:
"Feline Dominator
[2][G][W]
Creature - Cat
Whenever Feline Dominator attacks, it gets +1/+1 until end of turn for each other attacking Cat you control.
Whenever Feline Dominator deals combat damage, you gain that much life.
The true ruler of the house, demanding tribute from all who enter its domain.
2/4
Uncommon"

Only provide one card example according to image description.
'''

    instruction = f"""
<|system|>
{agent_maker_sys}</s>
<|user|>
"""

    prompt = f"{instruction.strip()}\n{user_prompt}</s>"    
    #print(f"PROMPT: {prompt}")
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    return outputs


def infer(image_in, cap_type):
    gr.Info("Getting image description...")
    if cap_type == "Fictional" :
        user_prompt = get_caption_from_MD(image_in)
    elif cap_type == "Literal" :
        user_prompt = get_caption_from_kosmos(image_in)
    
    gr.Info("Building a new card according to the image caption ...")
    outputs = get_card_idea(user_prompt)
    

    pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
    cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
    
    print(f"SUGGESTED CARD: {cleaned_text}")
    
    return cleaned_text.lstrip("\n")

title = f"Magic Card Generator",
description = f""

css = """
#col-container{
    margin: 0 auto;
    max-width: 780px;
    text-align: left;
}
/* fix examples gallery width on mobile */
div#component-14 > .gallery > .gallery-item > .container > img {
    width: auto!important;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(f"""
        <h2 style="text-align: center;">Magic Card Generator</h2>
        <p style="text-align: center;">{description}</p>
        """)
        
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(
                    label = "Image reference",
                    type = "filepath",
                    elem_id = "image-in"
                )
                cap_type = gr.Radio(
                    label = "Caption type",
                    choices = [
                        "Literal",
                        "Fictional"
                    ],
                    value = "Fictional"
                )
                submit_btn = gr.Button("Make a card from my pic !")
                
                gr.Examples(
                    examples = [
                        ["examples/monalisa.png"],
                        ["examples/violonist.png"],
                        ["examples/frog.jpeg"],
                        ["examples/samourai.png"]
                    ],
                    fn = infer,
                    inputs = [image_in, cap_type]
                )
            with gr.Column():
                result = gr.Textbox(
                    label = "Suggested Card",
                    lines = 6,
                    max_lines = 30,
                    elem_id = "suggested-card"
                )
        
        

    submit_btn.click(
        fn = infer,
        inputs = [
            image_in,
            cap_type
        ],
        outputs =[
            result
        ]
    )

demo.queue().launch(show_api=False, show_error=True)