text2speech / app.py
Tharunika1601's picture
Update app.py
0710d1c verified
raw
history blame
1.02 kB
import streamlit as st
from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image
st.title("Text to Image Generation with CLIP")
# Load pretrained models
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
text = st.text_area("Enter a description:")
if st.button("Generate Image") and text:
# Process text and get CLIP features
text_features = clip_processor(text, return_tensors="pt", padding=True)
# Use CLIP's encode_image method to obtain the image features
image_representation = clip_model.encode_image(text_features.pixel_values)
# For visualization, you can convert the image representation back to an image
image_array = image_representation.squeeze().permute(1, 2, 0).cpu().numpy()
image = Image.fromarray((image_array * 255).astype('uint8'))
# Display the generated image
st.image(image, caption="Generated Image", use_column_width=True)