AidenYan commited on
Commit
9479038
·
verified ·
1 Parent(s): 3a0b9b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -29
app.py CHANGED
@@ -4,6 +4,7 @@ import torch
4
  from PIL import Image
5
  import requests
6
  from io import BytesIO
 
7
 
8
  # Function to perform mean pooling on the model outputs
9
  def mean_pooling(model_output, attention_mask):
@@ -41,11 +42,12 @@ def load_image_from_url(url):
41
  input_type = st.radio("Select input type:", ("Upload Image", "Image URL", "Type Sentence"))
42
 
43
  image = None
44
- typed_text = None
45
  if input_type == "Upload Image":
46
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
47
  if uploaded_file is not None:
48
- image = Image.open(uploaded_file)
 
49
  st.image(image, caption='Uploaded Image', use_column_width=True)
50
  elif input_type == "Image URL":
51
  image_url = st.text_input("Enter the image URL here:", "")
@@ -58,36 +60,24 @@ elif input_type == "Type Sentence":
58
 
59
  # Generate caption button
60
  if st.button('Generate Caption and Process'):
61
- generated_text_p1 = ""
62
- if image:
63
- with st.spinner("Generating caption..."):
64
  if input_type == "Upload Image" and uploaded_file is not None:
65
- result = image_to_text(uploaded_file.getvalue())
 
 
66
  elif input_type == "Image URL" and image_url:
67
  result = image_to_text(image_url)
68
- generated_text_p1 = result[0]['generated_text']
69
- st.success(f'Generated Caption: {generated_text_p1}')
70
- elif typed_text:
71
- generated_text_p1 = typed_text
72
- st.success(f'Entered Text: {generated_text_p1}')
73
-
74
- if generated_text_p1:
75
- # Tokenize the new sentence and compute its embedding
76
- encoded_input = tokenizer_text(generated_text_p1, return_tensors="pt")
77
- with torch.no_grad():
78
- model_output = model_text(**encoded_input)
79
-
80
- # Perform mean pooling on the embeddings
81
- attention_mask = encoded_input['attention_mask']
82
- embeddings = mean_pooling(model_output, attention_mask)
83
 
84
- # For demonstration, use the embeddings for further processing or similarity comparisons here
85
-
86
- # Optionally generate further text based on the caption or entered text
87
- with st.spinner("Generating further text..."):
88
- encoded_caption = tokenizer_gpt2(generated_text_p1, return_tensors="pt")
89
- output = model_gpt2.generate(**encoded_caption, max_length=50, num_return_sequences=1)
90
- generated_text_p2 = tokenizer_gpt2.decode(output[0], skip_special_tokens=True)
91
- st.success(f'Further Generated Text: {generated_text_p2}')
92
  else:
93
  st.error("Please upload an image, enter an image URL, or type a sentence first.")
 
 
4
  from PIL import Image
5
  import requests
6
  from io import BytesIO
7
+ import io
8
 
9
  # Function to perform mean pooling on the model outputs
10
  def mean_pooling(model_output, attention_mask):
 
42
  input_type = st.radio("Select input type:", ("Upload Image", "Image URL", "Type Sentence"))
43
 
44
  image = None
45
+ typed_text = ""
46
  if input_type == "Upload Image":
47
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
48
  if uploaded_file is not None:
49
+ # Convert bytes to a PIL image
50
+ image = Image.open(io.BytesIO(uploaded_file.getvalue()))
51
  st.image(image, caption='Uploaded Image', use_column_width=True)
52
  elif input_type == "Image URL":
53
  image_url = st.text_input("Enter the image URL here:", "")
 
60
 
61
  # Generate caption button
62
  if st.button('Generate Caption and Process'):
63
+ if image or typed_text:
64
+ with st.spinner("Processing..."):
65
+ generated_text_p1 = ""
66
  if input_type == "Upload Image" and uploaded_file is not None:
67
+ # Use the PIL image directly with the pipeline
68
+ result = image_to_text(image)
69
+ generated_text_p1 = result[0]['generated_text']
70
  elif input_type == "Image URL" and image_url:
71
  result = image_to_text(image_url)
72
+ generated_text_p1 = result[0]['generated_text']
73
+ elif input_type == "Type Sentence" and typed_text:
74
+ generated_text_p1 = typed_text # Use the typed text directly
75
+
76
+ if generated_text_p1:
77
+ st.success(f'Processed Text: {generated_text_p1}')
 
 
 
 
 
 
 
 
 
78
 
79
+ # Further processing of the text can be done here
80
+ # This could include generating embeddings, further text, etc., similar to what is done for the image captions
 
 
 
 
 
 
81
  else:
82
  st.error("Please upload an image, enter an image URL, or type a sentence first.")
83
+