Deltan2002 commited on
Commit
33903b2
1 Parent(s): a347a3b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from dotenv import find_dotenv, load_dotenv
3
+ from langchain import PromptTemplate, LLMChain, HuggingFaceHub
4
+ import streamlit as st
5
+ import requests
6
+ import os
7
+
8
+
9
+ load_dotenv(find_dotenv())
10
+ HUGGINGFACE_API = os.getenv("HUGGINGFACE_API")
11
+
12
+
13
+ def image2text(url):
14
+ image_to_text = pipeline('image-to-text', model='Salesforce/blip-image-captioning-large')
15
+ text = image_to_text(url)[0]['generated_text']
16
+ print(text)
17
+ return text
18
+
19
+
20
+
21
+ def generate_story(scenario, length):
22
+ template = """
23
+ You are story teller, generate a short story in {length} words\n
24
+ CONTEXT:{scenario}\n
25
+ STORY:
26
+ """
27
+
28
+ prompt = PromptTemplate(template=template, input_variables=["scenario","length"])
29
+ llm = LLMChain(llm=HuggingFaceHub(huggingfacehub_api_token=HUGGINGFACE_API, repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"), prompt=prompt, verbose=True)
30
+ story = llm.predict(scenario=scenario, length=length)
31
+ print(story)
32
+ return story
33
+
34
+
35
+
36
+ # def text2speech(message):
37
+ # API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts"
38
+ # headers = {"Authorization": f"Bearer {HUGGINGFACE_API}"}
39
+ # payloads = {
40
+ # "inputs": message
41
+ # }
42
+ # response = requests.post(API_URL,headers=headers,json=payloads)
43
+ # with open('audio.wav', 'wb') as file:
44
+ # file.write(response.content)
45
+
46
+
47
+
48
+ def main():
49
+ st.set_page_config(page_title="Image Storyteller")
50
+
51
+ st.header("Image to Audio")
52
+ uploaded_file = st.file_uploader("Choose an Image", type="jpg")
53
+
54
+ length = st.number_input("Length")
55
+ scenario = ""
56
+ successful_processing = False
57
+
58
+ if uploaded_file is not None:
59
+ print(uploaded_file)
60
+ bytes_data = uploaded_file.getvalue()
61
+ with open(uploaded_file.name, "wb") as file:
62
+ file.write(bytes_data)
63
+
64
+ st.image(uploaded_file.name, caption="Uploaded Image", use_column_width=True)
65
+
66
+ try:
67
+ scenario = image2text(uploaded_file.name)
68
+ successful_processing = True
69
+ except Exception as e:
70
+ st.error(f"Error processing the image: {e}")
71
+
72
+ if successful_processing:
73
+ story = generate_story(scenario, length)
74
+ # text2speech(story)
75
+
76
+ with st.expander("scenario"):
77
+ st.write(scenario)
78
+ with st.expander("generated story"):
79
+ st.write(story)
80
+ # st.audio('audio.wav')
81
+
82
+ if __name__ == '__main__':
83
+ main()