denizezkurt commited on
Commit
31c667d
·
1 Parent(s): a3d180d

add application

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import find_dotenv, load_dotenv
2
+ from transformers import pipeline
3
+ from langchain import PromptTemplate, LLMChain, OpenAI
4
+ from langchain.chat_models import ChatOpenAI
5
+ import requests
6
+ import os
7
+ import streamlit as st
8
+
9
+
10
+ load_dotenv(find_dotenv())
11
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
12
+
13
+ # img2text
14
+ def img2text(url):
15
+ image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
16
+
17
+ text = image_to_text(url)[0]["generated_text"]
18
+
19
+ print(text)
20
+ return text
21
+
22
+
23
+ # llm storyteller
24
+ def generate_story(scenario):
25
+ template = """
26
+ You are a storyteller;
27
+ You can generate a short story based on a simple narrative, the story should be no more than 100 words;
28
+
29
+ CONTEXT: {scenario}
30
+ STORY:
31
+ """
32
+
33
+ prompt = PromptTemplate(template=template, input_variables=["scenario"])
34
+
35
+ story_llm = LLMChain(llm=OpenAI(
36
+ model_name="gpt-3.5-turbo", temperature=1), prompt=prompt, verbose=True)
37
+
38
+ story = story_llm.predict(scenario=scenario)
39
+
40
+ print(story)
41
+ return story
42
+
43
+
44
+
45
+ # text2speech
46
+ def text2speech(message):
47
+ API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
48
+ headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
49
+ payloads = {
50
+ "inputs": message
51
+ }
52
+
53
+ response = requests.post(API_URL, headers=headers, json=payloads)
54
+ with open('audio.flac', 'wb') as file:
55
+ file.write(response.content)
56
+
57
+
58
+ #UI Streamlit
59
+ def main():
60
+
61
+ st.set_page_config(page_title="imageteller", page_icon="🤖")
62
+
63
+ st.header("ImageTeller: Turn any image into a story")
64
+ uploaded_file = st.file_uploader("choose an image", type="png")
65
+
66
+ if uploaded_file is not None:
67
+ print(uploaded_file)
68
+ bytes_data = uploaded_file.getvalue()
69
+ with open(uploaded_file.name, "wb") as file:
70
+ file.write(bytes_data)
71
+ st.image(uploaded_file, caption='Uploaded Image.',
72
+ use_column_width=True)
73
+ scenario = img2text(uploaded_file.name)
74
+ story = generate_story(scenario)
75
+ text2speech(story)
76
+
77
+ with st.expander("scenario"):
78
+ st.write(scenario)
79
+ with st.expander("story"):
80
+ st.write(story)
81
+
82
+ st.audio("audio.flac")
83
+
84
+
85
+ if __name__ == '__main__':
86
+ main()