AidenYan commited on
Commit
c44bffb
·
verified ·
1 Parent(s): 9479038

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -8,7 +8,7 @@ import io
8
 
9
  # Function to perform mean pooling on the model outputs
10
  def mean_pooling(model_output, attention_mask):
11
- token_embeddings = model_output['last_hidden_state'] # Get the embeddings from the model output
12
  input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
13
  sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
14
  sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
@@ -26,7 +26,7 @@ model_text = AutoModel.from_pretrained('jim33282007/5240_grp27_proj')
26
  model_gpt2 = AutoModelForCausalLM.from_pretrained('gpt2')
27
  tokenizer_gpt2 = AutoTokenizer.from_pretrained('gpt2')
28
 
29
- st.title('Image Captioning, Text Embedding, and Text Input Application')
30
 
31
  # Function to load images from URL
32
  def load_image_from_url(url):
@@ -46,7 +46,6 @@ typed_text = ""
46
  if input_type == "Upload Image":
47
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
48
  if uploaded_file is not None:
49
- # Convert bytes to a PIL image
50
  image = Image.open(io.BytesIO(uploaded_file.getvalue()))
51
  st.image(image, caption='Uploaded Image', use_column_width=True)
52
  elif input_type == "Image URL":
@@ -58,26 +57,28 @@ elif input_type == "Image URL":
58
  elif input_type == "Type Sentence":
59
  typed_text = st.text_area("Type your sentence here:")
60
 
61
- # Generate caption button
62
- if st.button('Generate Caption and Process'):
63
  if image or typed_text:
64
  with st.spinner("Processing..."):
65
  generated_text_p1 = ""
66
  if input_type == "Upload Image" and uploaded_file is not None:
67
- # Use the PIL image directly with the pipeline
68
  result = image_to_text(image)
69
  generated_text_p1 = result[0]['generated_text']
70
  elif input_type == "Image URL" and image_url:
71
  result = image_to_text(image_url)
72
  generated_text_p1 = result[0]['generated_text']
73
  elif input_type == "Type Sentence" and typed_text:
74
- generated_text_p1 = typed_text # Use the typed text directly
75
 
76
  if generated_text_p1:
77
  st.success(f'Processed Text: {generated_text_p1}')
78
 
79
- # Further processing of the text can be done here
80
- # This could include generating embeddings, further text, etc., similar to what is done for the image captions
 
 
 
 
81
  else:
82
  st.error("Please upload an image, enter an image URL, or type a sentence first.")
83
-
 
8
 
9
  # Function to perform mean pooling on the model outputs
10
  def mean_pooling(model_output, attention_mask):
11
+ token_embeddings = model_output['last_hidden_state']
12
  input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
13
  sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
14
  sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
 
26
  model_gpt2 = AutoModelForCausalLM.from_pretrained('gpt2')
27
  tokenizer_gpt2 = AutoTokenizer.from_pretrained('gpt2')
28
 
29
+ st.title('Image Captioning, Text Embedding, Text Generation, and Input Application')
30
 
31
  # Function to load images from URL
32
  def load_image_from_url(url):
 
46
  if input_type == "Upload Image":
47
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
48
  if uploaded_file is not None:
 
49
  image = Image.open(io.BytesIO(uploaded_file.getvalue()))
50
  st.image(image, caption='Uploaded Image', use_column_width=True)
51
  elif input_type == "Image URL":
 
57
  elif input_type == "Type Sentence":
58
  typed_text = st.text_area("Type your sentence here:")
59
 
60
+ # Generate caption and process text button
61
+ if st.button('Generate Caption and Process Text'):
62
  if image or typed_text:
63
  with st.spinner("Processing..."):
64
  generated_text_p1 = ""
65
  if input_type == "Upload Image" and uploaded_file is not None:
 
66
  result = image_to_text(image)
67
  generated_text_p1 = result[0]['generated_text']
68
  elif input_type == "Image URL" and image_url:
69
  result = image_to_text(image_url)
70
  generated_text_p1 = result[0]['generated_text']
71
  elif input_type == "Type Sentence" and typed_text:
72
+ generated_text_p1 = typed_text
73
 
74
  if generated_text_p1:
75
  st.success(f'Processed Text: {generated_text_p1}')
76
 
77
+ # Generate additional text using GPT-2 based on the processed text
78
+ input_ids = tokenizer_gpt2.encode(generated_text_p1, return_tensors='pt')
79
+ generated_outputs = model_gpt2.generate(input_ids, max_length=100, num_return_sequences=1)
80
+ generated_text = tokenizer_gpt2.decode(generated_outputs[0], skip_special_tokens=True)
81
+
82
+ st.text_area("Generated Text:", generated_text, height=200)
83
  else:
84
  st.error("Please upload an image, enter an image URL, or type a sentence first.")