Spaces:

AidenYan
/

Jim_Aiden

Runtime error

App Files Files Community

AidenYan commited on Mar 17, 2024

Commit

9479038

verified ·

1 Parent(s): 3a0b9b9

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -29

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch
 from PIL import Image
 import requests
 from io import BytesIO
 # Function to perform mean pooling on the model outputs
 def mean_pooling(model_output, attention_mask):
@@ -41,11 +42,12 @@ def load_image_from_url(url):
 input_type = st.radio("Select input type:", ("Upload Image", "Image URL", "Type Sentence"))
 image = None
-typed_text = None
 if input_type == "Upload Image":
     uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
     if uploaded_file is not None:
-        image = Image.open(uploaded_file)
         st.image(image, caption='Uploaded Image', use_column_width=True)
 elif input_type == "Image URL":
     image_url = st.text_input("Enter the image URL here:", "")
@@ -58,36 +60,24 @@ elif input_type == "Type Sentence":
 # Generate caption button
 if st.button('Generate Caption and Process'):
-    generated_text_p1 = ""
-    if image:
-        with st.spinner("Generating caption..."):
             if input_type == "Upload Image" and uploaded_file is not None:
-                result = image_to_text(uploaded_file.getvalue())
             elif input_type == "Image URL" and image_url:
                 result = image_to_text(image_url)
-            generated_text_p1 = result[0]['generated_text']
-            st.success(f'Generated Caption: {generated_text_p1}')
-    elif typed_text:
-        generated_text_p1 = typed_text
-        st.success(f'Entered Text: {generated_text_p1}')
-    if generated_text_p1:
-        # Tokenize the new sentence and compute its embedding
-        encoded_input = tokenizer_text(generated_text_p1, return_tensors="pt")
-        with torch.no_grad():
-            model_output = model_text(**encoded_input)
-        # Perform mean pooling on the embeddings
-        attention_mask = encoded_input['attention_mask']
-        embeddings = mean_pooling(model_output, attention_mask)
-        # For demonstration, use the embeddings for further processing or similarity comparisons here
-        # Optionally generate further text based on the caption or entered text
-        with st.spinner("Generating further text..."):
-            encoded_caption = tokenizer_gpt2(generated_text_p1, return_tensors="pt")
-            output = model_gpt2.generate(**encoded_caption, max_length=50, num_return_sequences=1)
-            generated_text_p2 = tokenizer_gpt2.decode(output[0], skip_special_tokens=True)
-            st.success(f'Further Generated Text: {generated_text_p2}')
     else:
         st.error("Please upload an image, enter an image URL, or type a sentence first.")

 from PIL import Image
 import requests
 from io import BytesIO
+import io
 # Function to perform mean pooling on the model outputs
 def mean_pooling(model_output, attention_mask):
 input_type = st.radio("Select input type:", ("Upload Image", "Image URL", "Type Sentence"))
 image = None
+typed_text = ""
 if input_type == "Upload Image":
     uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
     if uploaded_file is not None:
+        # Convert bytes to a PIL image
+        image = Image.open(io.BytesIO(uploaded_file.getvalue()))
         st.image(image, caption='Uploaded Image', use_column_width=True)
 elif input_type == "Image URL":
     image_url = st.text_input("Enter the image URL here:", "")
 # Generate caption button
 if st.button('Generate Caption and Process'):
+    if image or typed_text:
+        with st.spinner("Processing..."):
+            generated_text_p1 = ""
             if input_type == "Upload Image" and uploaded_file is not None:
+                # Use the PIL image directly with the pipeline
+                result = image_to_text(image)
+                generated_text_p1 = result[0]['generated_text']
             elif input_type == "Image URL" and image_url:
                 result = image_to_text(image_url)
+                generated_text_p1 = result[0]['generated_text']
+            elif input_type == "Type Sentence" and typed_text:
+                generated_text_p1 = typed_text  # Use the typed text directly
+            if generated_text_p1:
+                st.success(f'Processed Text: {generated_text_p1}')
+                # Further processing of the text can be done here
+                # This could include generating embeddings, further text, etc., similar to what is done for the image captions
     else:
         st.error("Please upload an image, enter an image URL, or type a sentence first.")