davila7 commited on
Commit
20c44d3
1 Parent(s): a52b6f6

CEO Microsoft

Browse files
Files changed (1) hide show
  1. app.py +74 -75
app.py CHANGED
@@ -33,89 +33,88 @@ array = []
33
 
34
  # Uncomment this section if you want to upload your own video
35
  # Sidebar
36
- with st.sidebar:
37
- user_secret = st.text_input(label = ":blue[OpenAI API key]",
38
- value="",
39
- placeholder = "Paste your openAI API key, sk-",
40
- type = "password")
41
- youtube_link = st.text_input(label = ":red[Youtube link]",
42
- value="https://youtu.be/bsFXgfbj8Bc",
43
- placeholder = "")
44
- if youtube_link and user_secret:
45
- youtube_video = YouTube(youtube_link)
46
- video_id = pytube.extract.video_id(youtube_link)
47
- streams = youtube_video.streams.filter(only_audio=True)
48
- stream = streams.first()
49
- if st.button("Start Analysis"):
50
- if os.path.exists("word_embeddings.csv"):
51
- os.remove("word_embeddings.csv")
52
 
53
- with st.spinner('Running process...'):
54
- # Get the video mp4
55
- mp4_video = stream.download(filename='youtube_video.mp4')
56
- audio_file = open(mp4_video, 'rb')
57
- st.write(youtube_video.title)
58
- st.video(youtube_link)
59
-
60
- # Whisper
61
- output = model.transcribe("youtube_video.mp4")
62
 
63
- # Transcription
64
- transcription = {
65
- "title": youtube_video.title.strip(),
66
- "transcription": output['text']
67
- }
68
- data_transcription.append(transcription)
69
- pd.DataFrame(data_transcription).to_csv('transcription.csv')
70
- segments = output['segments']
71
-
72
- # Pinacone index
73
- # check if index_name index already exists (only create index if not)
74
- # index_name = str(video_id)
75
- # # check if 'index_name' index already exists (only create index if not)
76
- # if 'index1' not in pinecone.list_indexes():
77
- # pinecone.create_index('index1', dimension=len(segments))
78
- # # connect to index
79
- # index = pinecone.Index('index1')
80
 
81
- #st.write(segments)
82
- #Embeddings
83
- for segment in segments:
84
- openai.api_key = user_secret
85
- response = openai.Embedding.create(
86
- input= segment["text"].strip(),
87
- model="text-embedding-ada-002"
88
- )
89
- embeddings = response['data'][0]['embedding']
90
- meta = {
91
- "text": segment["text"].strip(),
92
- "start": segment['start'],
93
- "end": segment['end'],
94
- "embedding": embeddings
95
- }
96
- data.append(meta)
97
- # upsert_response = index.upsert(
98
- # vectors=data,
99
- # namespace=video_id
100
- # )
101
- pd.DataFrame(data).to_csv('word_embeddings.csv')
102
- os.remove("youtube_video.mp4")
103
- st.success('Analysis completed')
104
 
105
  st.markdown('<h1>Youtube GPT 🤖<small> by <a href="https://codegpt.co">Code GPT</a></small></h1>', unsafe_allow_html=True)
106
- #st.write("Start a chat with this video of Microsoft CEO Satya Nadella's interview. You just need to add your OpenAI API Key and paste it in the 'Chat with the video' tab.")
107
- st.write('Demo con Midudev')
108
 
109
  DEFAULT_WIDTH = 80
110
  VIDEO_DATA = "https://youtu.be/bsFXgfbj8Bc"
111
 
112
- # width = 40
113
 
114
- # width = max(width, 0.01)
115
- # side = max((100 - width) / 2, 0.01)
116
 
117
- # _, container, _ = st.columns([side, 47, side])
118
- # container.video(data=VIDEO_DATA)
119
  tab1, tab2, tab3, tab4 = st.tabs(["Intro", "Transcription", "Embedding", "Chat with the Video"])
120
  with tab1:
121
  st.markdown("### How does it work?")
@@ -150,9 +149,9 @@ with tab3:
150
  df = pd.read_csv('word_embeddings.csv')
151
  st.write(df)
152
  with tab4:
153
- # user_secret = st.text_input(label = ":blue[OpenAI API key]",
154
- # placeholder = "Paste your openAI API key, sk-",
155
- # type = "password")
156
  st.write('To obtain an API Key you must create an OpenAI account at the following link: https://openai.com/api/')
157
  if 'generated' not in st.session_state:
158
  st.session_state['generated'] = []
 
33
 
34
  # Uncomment this section if you want to upload your own video
35
  # Sidebar
36
+ # with st.sidebar:
37
+ # user_secret = st.text_input(label = ":blue[OpenAI API key]",
38
+ # value="",
39
+ # placeholder = "Paste your openAI API key, sk-",
40
+ # type = "password")
41
+ # youtube_link = st.text_input(label = ":red[Youtube link]",
42
+ # value="https://youtu.be/bsFXgfbj8Bc",
43
+ # placeholder = "")
44
+ # if youtube_link and user_secret:
45
+ # youtube_video = YouTube(youtube_link)
46
+ # video_id = pytube.extract.video_id(youtube_link)
47
+ # streams = youtube_video.streams.filter(only_audio=True)
48
+ # stream = streams.first()
49
+ # if st.button("Start Analysis"):
50
+ # if os.path.exists("word_embeddings.csv"):
51
+ # os.remove("word_embeddings.csv")
52
 
53
+ # with st.spinner('Running process...'):
54
+ # # Get the video mp4
55
+ # mp4_video = stream.download(filename='youtube_video.mp4')
56
+ # audio_file = open(mp4_video, 'rb')
57
+ # st.write(youtube_video.title)
58
+ # st.video(youtube_link)
59
+
60
+ # # Whisper
61
+ # output = model.transcribe("youtube_video.mp4")
62
 
63
+ # # Transcription
64
+ # transcription = {
65
+ # "title": youtube_video.title.strip(),
66
+ # "transcription": output['text']
67
+ # }
68
+ # data_transcription.append(transcription)
69
+ # pd.DataFrame(data_transcription).to_csv('transcription.csv')
70
+ # segments = output['segments']
71
+
72
+ # # Pinacone index
73
+ # # check if index_name index already exists (only create index if not)
74
+ # # index_name = str(video_id)
75
+ # # # check if 'index_name' index already exists (only create index if not)
76
+ # # if 'index1' not in pinecone.list_indexes():
77
+ # # pinecone.create_index('index1', dimension=len(segments))
78
+ # # # connect to index
79
+ # # index = pinecone.Index('index1')
80
 
81
+ # #st.write(segments)
82
+ # #Embeddings
83
+ # for segment in segments:
84
+ # openai.api_key = user_secret
85
+ # response = openai.Embedding.create(
86
+ # input= segment["text"].strip(),
87
+ # model="text-embedding-ada-002"
88
+ # )
89
+ # embeddings = response['data'][0]['embedding']
90
+ # meta = {
91
+ # "text": segment["text"].strip(),
92
+ # "start": segment['start'],
93
+ # "end": segment['end'],
94
+ # "embedding": embeddings
95
+ # }
96
+ # data.append(meta)
97
+ # # upsert_response = index.upsert(
98
+ # # vectors=data,
99
+ # # namespace=video_id
100
+ # # )
101
+ # pd.DataFrame(data).to_csv('word_embeddings.csv')
102
+ # os.remove("youtube_video.mp4")
103
+ # st.success('Analysis completed')
104
 
105
  st.markdown('<h1>Youtube GPT 🤖<small> by <a href="https://codegpt.co">Code GPT</a></small></h1>', unsafe_allow_html=True)
106
+ st.write("Start a chat with this video of Microsoft CEO Satya Nadella's interview. You just need to add your OpenAI API Key and paste it in the 'Chat with the video' tab.")
 
107
 
108
  DEFAULT_WIDTH = 80
109
  VIDEO_DATA = "https://youtu.be/bsFXgfbj8Bc"
110
 
111
+ width = 40
112
 
113
+ width = max(width, 0.01)
114
+ side = max((100 - width) / 2, 0.01)
115
 
116
+ _, container, _ = st.columns([side, 47, side])
117
+ container.video(data=VIDEO_DATA)
118
  tab1, tab2, tab3, tab4 = st.tabs(["Intro", "Transcription", "Embedding", "Chat with the Video"])
119
  with tab1:
120
  st.markdown("### How does it work?")
 
149
  df = pd.read_csv('word_embeddings.csv')
150
  st.write(df)
151
  with tab4:
152
+ user_secret = st.text_input(label = ":blue[OpenAI API key]",
153
+ placeholder = "Paste your openAI API key, sk-",
154
+ type = "password")
155
  st.write('To obtain an API Key you must create an OpenAI account at the following link: https://openai.com/api/')
156
  if 'generated' not in st.session_state:
157
  st.session_state['generated'] = []