Spaces:

zforkash
/

Image-Transformer-Charater-Text

Sleeping

App Files Files Community

zforkash commited on Nov 7, 2024

Commit

baa9321

verified ·

1 Parent(s): fc17b4b

Update1

Browse files

Files changed (1) hide show

app.py +40 -41

app.py CHANGED Viewed

@@ -4,62 +4,61 @@ from huggingface_hub import InferenceClient
 from PIL import Image
 import os
-def setup_session():
-    if 'app_ready' not in st.session_state:
-        print("Powering up the Dragon Radar...")
-        st.session_state['app_ready'] = True
-        st.session_state['hf_token'] = os.getenv("HUGGINGFACE_TOKEN")
-        st.session_state['client'] = InferenceClient(api_key=st.session_state['hf_token'])
 def main():
-    setup_session()
-    st.header("Anime & Friends Image Commentary")
-    st.write("Let your favorite characters react to any image!")
-    character = st.selectbox(
-        "Select your commentator",
-        ["goku", "elmo", "kirby", "pikachu"]
-    )
-    uploaded_img = st.file_uploader("Share your image!")
     if uploaded_img is not None:
         image = Image.open(uploaded_img)
         st.image(image)
-        caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
-        base_caption = caption_model(image)[0]['generated_text']
-        character_reactions = {
-            "goku": f"Describe this image like you're Goku from Dragon Ball Z, mentioning power levels: {base_caption}",
-            "elmo": f"Describe this image like you're Elmo from Sesame Street, speaking in third person: {base_caption}",
-            "kirby": f"Describe this image like you're Kirby, being cute and mentioning food: {base_caption}",
-            "pikachu": f"Describe this image like you're Pikachu, using 'pika' frequently: {base_caption}"
         }
         messages = [
-            {
-                "role": "user",
-                "content": character_reactions[character]
-            }
         ]
-        # Generate character response using Llama
-        response_stream = st.session_state['client'].chat.completions.create(
-            model="meta-llama/Llama-3.2-3B-Instruct",
-            messages=messages,
             max_tokens=500,
             stream=True
         )
-        character_response = ''
-        for chunk in response_stream:
-            character_response += chunk.choices[0].delta.content
-        st.write(character_response)
 if __name__ == '__main__':
     main()

 from PIL import Image
 import os
+def initialize():
+    if 'initialized' not in st.session_state:
+        print("Initializing...")
+        st.session_state['initialized'] = True
+        st.session_state['api_key'] = os.getenv("HUGGINGFACE_TOKEN")
+        st.session_state['client'] = InferenceClient(api_key=st.session_state['api_key'])
 def main():
+    initialize()
+    st.header("Character Captions")
+    st.write("Have a character caption any image you upload!")
+    character = st.selectbox("Choose a character", ["artist", "elmo", "unintelligible", "goku"])
+    uploaded_img = st.file_uploader("Upload an image here")
     if uploaded_img is not None:
         image = Image.open(uploaded_img)
         st.image(image)
+        image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
+        response = image_captioner(image)
+        caption = response[0]['generated_text']
+        character_prompts = {
+            "artist": f"Describe this caption like you're a artist: {caption}.",
+            "elmo": f"Describe this caption like you're elmo: {caption}.",
+            "unintelligible": f"Describe this caption in a way that makes no sense: {caption}.",
+            "goku": f"Describe this caption like you're goku: {caption}."
         }
+        prompt = character_prompts[character]
         messages = [
+            { "role": "user", "content": prompt }
         ]
+        stream = st.session_state['client'].chat.completions.create(
+            model="meta-llama/Llama-3.2-3B-Instruct",
+            messages=messages,
             max_tokens=500,
             stream=True
         )
+        response = ''
+        for chunk in stream:
+            response += chunk.choices[0].delta.content
+        st.write(response)
 if __name__ == '__main__':
     main()