KunalKharalkar commited on
Commit
9e375c7
1 Parent(s): c2f4eae

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +72 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import find_dotenv, load_dotenv
2
+ from transformers import pipeline
3
+ from langchain import PromptTemplate, LLMChain, OpenAI
4
+ import requests
5
+ import os
6
+ import streamlit as st
7
+
8
+ load_dotenv(find_dotenv())
9
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
10
+
11
+ #module1: image to text
12
+ def imgtotxt(url):
13
+ img_to_txt = pipeline("image-to-text",model="Salesforce/blip-image-captioning-base")
14
+ text = img_to_txt(url)[0]["generated_text"]
15
+ print(text)
16
+ return text
17
+
18
+
19
+ #module2: llm
20
+ def generate_story(scenario):
21
+ template = """
22
+ You are a story teller;
23
+ You can generate a short story based on a simple narrative, the story should be no more than 50 words;
24
+
25
+ CONTEXT: {scenario}
26
+ STORY:
27
+ """
28
+
29
+ prompt = PromptTemplate(template=template, input_variables=["scenario"])
30
+
31
+ story_llm = LLMChain(llm=OpenAI(model_name="gpt-3.5-turbo", temperature=1), prompt=prompt, verbose=True)
32
+ story = story_llm.predict(scenario=scenario)
33
+
34
+ print(story)
35
+ return story
36
+
37
+
38
+ #module3: text to speech
39
+ def texttospeech(message):
40
+ API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
41
+ headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
42
+ payloads = {
43
+ "inputs": message
44
+ }
45
+
46
+ response = requests.post(API_URL, headers=headers, json=payloads)
47
+ with open('audio.flac', 'wb') as file:
48
+ file.write(response.content)
49
+
50
+ def main():
51
+ st.set_page_config(page_title="Image to Audio Story", page_icon="🗣️")
52
+ st.header("Turn Image into Audio Story")
53
+ uploaded_file = st.file_uploader("Choose an Image...", type="jpg")
54
+
55
+ if uploaded_file is not None:
56
+ bytes_data = uploaded_file.getvalue()
57
+ with open(uploaded_file.name, "wb") as file:
58
+ file.write(bytes_data)
59
+ st.image(uploaded_file, caption="Uploaded Image.", use_column_width=True)
60
+ scenario = imgtotxt(uploaded_file.name)
61
+ story= generate_story(scenario)
62
+ texttospeech(story)
63
+
64
+ with st.expander("Scenario"):
65
+ st.write(scenario)
66
+ with st.expander("Story"):
67
+ st.write(story)
68
+
69
+ st.audio("audio.flac")
70
+
71
+ if __name__ == '__main__':
72
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ python-dotenv
2
+ transformers
3
+ langchain
4
+ requests
5
+ streamlit
6
+ torch
7
+ openai