Spaces:
Runtime error
Runtime error
File size: 3,726 Bytes
3d77d90 9479038 3d77d90 94f20ab c44bffb 94f20ab 3d77d90 e94e4fb 3d77d90 c44bffb 3d77d90 94f20ab 3d77d90 9479038 3d77d90 9479038 3d77d90 94f20ab 3d77d90 c44bffb 9479038 3d77d90 9479038 3d77d90 9479038 c44bffb 9479038 3d77d90 c44bffb 3d77d90 94f20ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModel, AutoModelForCausalLM
import torch
from PIL import Image
import requests
from io import BytesIO
import io
# Function to perform mean pooling on the model outputs
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output['last_hidden_state']
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
mean_pooled_embeddings = sum_embeddings / sum_mask
return mean_pooled_embeddings
# Initialize the pipeline for image-to-text
image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
# Initialize tokenizer and model for text processing
tokenizer_text = AutoTokenizer.from_pretrained('jim33282007/5240_grp27_proj')
model_text = AutoModel.from_pretrained('jim33282007/5240_grp27_proj')
# Initialize a text generation model
model_gpt2 = AutoModelForCausalLM.from_pretrained('gpt2-xl')
tokenizer_gpt2 = AutoTokenizer.from_pretrained('gpt2-xl')
st.title('Image Captioning, Text Embedding, Text Generation, and Input Application')
# Function to load images from URL
def load_image_from_url(url):
try:
response = requests.get(url)
img = Image.open(BytesIO(response.content))
return img
except Exception as e:
st.error(f"Error loading image from URL: {e}")
return None
# User option to select input type: Upload, URL, or Type Sentence
input_type = st.radio("Select input type:", ("Upload Image", "Image URL", "Type Sentence"))
image = None
typed_text = ""
if input_type == "Upload Image":
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
image = Image.open(io.BytesIO(uploaded_file.getvalue()))
st.image(image, caption='Uploaded Image', use_column_width=True)
elif input_type == "Image URL":
image_url = st.text_input("Enter the image URL here:", "")
if image_url:
image = load_image_from_url(image_url)
if image:
st.image(image, caption='Image from URL', use_column_width=True)
elif input_type == "Type Sentence":
typed_text = st.text_area("Type your sentence here:")
# Generate caption and process text button
if st.button('Generate Caption and Process Text'):
if image or typed_text:
with st.spinner("Processing..."):
generated_text_p1 = ""
if input_type == "Upload Image" and uploaded_file is not None:
result = image_to_text(image)
generated_text_p1 = result[0]['generated_text']
elif input_type == "Image URL" and image_url:
result = image_to_text(image_url)
generated_text_p1 = result[0]['generated_text']
elif input_type == "Type Sentence" and typed_text:
generated_text_p1 = typed_text
if generated_text_p1:
st.success(f'Processed Text: {generated_text_p1}')
# Generate additional text using GPT-2 based on the processed text
input_ids = tokenizer_gpt2.encode(generated_text_p1, return_tensors='pt')
generated_outputs = model_gpt2.generate(input_ids, max_length=100, num_return_sequences=1)
generated_text = tokenizer_gpt2.decode(generated_outputs[0], skip_special_tokens=True)
st.text_area("Generated Text:", generated_text, height=200)
else:
st.error("Please upload an image, enter an image URL, or type a sentence first.")
|