Pranav4datasc commited on
Commit
bd5c569
β€’
1 Parent(s): b645af6

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +56 -0
  2. beachboat.jpg +0 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import find_dotenv, load_dotenv
2
+ from transformers import pipeline
3
+ import requests
4
+ import os
5
+ import streamlit as st
6
+
7
+ load_dotenv(find_dotenv())
8
+ api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
9
+ #img2text
10
+ def img2text(url):
11
+ image_to_text = pipeline("image-to-text",model='Salesforce/blip-image-captioning-large')
12
+
13
+ text = image_to_text(url)[0]["generated_text"]
14
+ #print(text)
15
+ return text
16
+ #
17
+
18
+ #text2speech
19
+ def text2speech(message):
20
+ API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
21
+ #API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts"
22
+ headers = {"Authorization": f"Bearer {api_token}"}
23
+ payloads = {
24
+ "inputs":message
25
+ }
26
+ response = requests.post(API_URL, headers=headers, json=payloads)
27
+ with open('audio.flac','wb') as file:
28
+ file.write(response.content)
29
+
30
+
31
+ def main():
32
+ st.title("Image to text to audio by πŸ€–")
33
+ st.header("Turn image to audio podcast !!!")
34
+ st.caption("Sample picture...")
35
+ st.image("beachboat.jpg")
36
+ img2text("beachboat.jpg")
37
+ uploaded_file = st.file_uploader("Choose your image or simpley drag sample image given above",type="jpg")
38
+
39
+ if uploaded_file is not None:
40
+ print(uploaded_file)
41
+ bytes_data = uploaded_file.getvalue()
42
+ with open(uploaded_file.name,"wb")as file:
43
+ file.write(bytes_data)
44
+ st.image(uploaded_file,caption='Uploaded image.',
45
+ use_column_width=True)
46
+ scenario = img2text(uploaded_file.name)
47
+ text2speech(scenario)
48
+
49
+ with st.expander("Scenario"):
50
+ st.write(scenario)
51
+
52
+ st.audio("audio.flac")
53
+
54
+ if __name__ == '__main__':
55
+ main()
56
+
beachboat.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ os
3
+ requests
4
+ transformers
5
+ dotenv