from transformers import pipeline from dotenv import find_dotenv, load_dotenv from langchain import PromptTemplate, LLMChain, HuggingFaceHub import streamlit as st import requests import os load_dotenv(find_dotenv()) huggingface_api_key = os.getenv("HUGGINGFACE_API") def image2text(url): image_to_text = pipeline('image-to-text', model='Salesforce/blip-image-captioning-large') text = image_to_text(url)[0]['generated_text'] print(text) return text def generate_story(scenario, length): template = """ You are story teller, generate a short story in {length} words\n CONTEXT:{scenario}\n STORY: """ prompt = PromptTemplate(template=template, input_variables=["scenario","length"]) llm = LLMChain(llm=HuggingFaceHub(huggingfacehub_api_token=huggingface_api_key, repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"), prompt=prompt, verbose=True) story = llm.predict(scenario=scenario, length=length) print(story) return story # def text2speech(message): # API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts" # headers = {"Authorization": f"Bearer {HUGGINGFACE_API}"} # payloads = { # "inputs": message # } # response = requests.post(API_URL,headers=headers,json=payloads) # with open('audio.wav', 'wb') as file: # file.write(response.content) def main(): st.set_page_config(page_title="Image Storyteller") st.header("Image to Story") length = st.number_input("Length") if not length: length = 10 uploaded_file = st.file_uploader("Choose an Image", type="jpg") scenario = "" successful_processing = False if uploaded_file is not None: print(uploaded_file) bytes_data = uploaded_file.getvalue() with open(uploaded_file.name, "wb") as file: file.write(bytes_data) st.image(uploaded_file.name, caption="Uploaded Image", use_column_width=True) try: scenario = image2text(uploaded_file.name) successful_processing = True except Exception as e: st.error(f"Error processing the image: {e}") if successful_processing: story = generate_story(scenario, length) # text2speech(story) with st.expander("Scenario"): st.write(scenario) with st.expander("Generated Story"): st.write(story) # st.audio('audio.wav') if __name__ == '__main__': main()