File size: 7,830 Bytes
ad8fefa
 
 
 
 
 
 
2285996
ad8fefa
2285996
ad8fefa
 
 
 
 
 
 
 
a9a8aac
f0bc773
 
 
ad8fefa
 
 
 
 
 
 
 
2285996
 
ad8fefa
 
 
 
 
2285996
 
ad8fefa
 
 
 
 
 
 
 
 
a9a8aac
 
2285996
 
 
 
 
 
 
 
 
 
 
 
ad8fefa
 
2285996
ad8fefa
 
 
f0bc773
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad8fefa
 
f0bc773
 
 
 
 
 
2285996
f0bc773
 
 
2285996
 
 
ad8fefa
2285996
 
 
ad8fefa
 
2285996
ad8fefa
 
 
2285996
ad8fefa
 
 
2285996
 
ad8fefa
2285996
ad8fefa
 
 
 
 
 
 
2285996
 
 
 
ad8fefa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2285996
 
 
ad8fefa
 
 
 
 
 
 
 
 
 
 
2285996
 
 
 
ad8fefa
 
 
 
 
 
 
 
 
 
a9a8aac
ad8fefa
 
 
2285996
 
 
 
 
f0bc773
ad8fefa
 
 
 
 
 
 
 
 
 
 
2285996
 
ad8fefa
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import os

import openai
import streamlit as st
from streamlit_chat import message

from config import output_path_video, output_path_transcription
from keyword_retriever.keyword_retreiver import MediaRetriever
from logger import logger
from resource_loader.uploaded_media_loader import UploadedMediaLoader
from resource_loader.youtube_loader import YouTubeLoader
from summarization_service.summarizer import TranscriptSummary
from utils import check_file_exists, download_video, transcribe_video, load_transcription

st.set_page_config(page_title="Summary", layout="wide")

# Initialize chat history
chat_history = []

# Initialize variables for LLM options and chosen LLM
llm_options = []
chosen_LLM = "default"


def generate_response(prompt_input):
    answer = transcript_summary.query_summary(prompt_input)
    return answer


@st.cache_resource()
def factory_transcript(media_id, model, llm_provider):
    ts = TranscriptSummary(doc_id=media_id, model=model, llm_provider=llm_provider)
    logger.info("TranscriptSummary initialized")
    return ts


@st.cache_resource()
def factory_media(media_id, top_k):
    retriever = MediaRetriever(media_id=media_id, similarity_top_k=top_k)
    logger.info("video_retriever initialized")
    return retriever


with st.sidebar:
    # Sidebar
    st.title("Controls")
    # Create a sidebar for the YouTube URL, search bar, and settings
    youtube_url = st.text_input("Enter YouTube URL:")
    uploaded_file = st.file_uploader("Or upload a video...",
                                     type=['mp4', 'mov', 'avi', 'flv', 'mkv', 'mp3', 'wav', 'aac', 'ogg'])

    if uploaded_file is not None:
        file_extension = uploaded_file.name.split('.')[-1]

        if file_extension in ['mp4', 'mov', 'avi', 'flv', 'mkv']:
            media_type = 'video'
        elif file_extension in ['mp3', 'wav', 'aac', 'ogg']:
            media_type = 'audio'
        else:
            media_type = 'unknown'

        media_loader = UploadedMediaLoader(uploaded_file, uploaded_file.name, media_type=media_type)

    elif youtube_url:
        media_loader = YouTubeLoader(youtube_url, output_path_video)

    similarity_top_k = st.number_input("Maximum Number of Results to Display", min_value=1, max_value=100, value=10)

    # Selecting the provider
    chosen_provider = st.selectbox("Choose Provider", ["OpenAI", "Replicate", "Default"])

    # Based on provider, display relevant LLMs
    if chosen_provider == "OpenAI":
        llm_options = ["gpt-3.5-turbo-0301", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k-0314"]
    elif chosen_provider == "Replicate":
        llm_options = ["mistralai/mistral-7b-v0.1:3e8a0fb6d7812ce30701ba597e5080689bef8a013e5c6a724fafb108cc2426a0",
                       "mistralai/mistral-7b-instruct-v0.1:83b6a56e7c828e667f21fd596c338fd4f0039b46bcfa18d973e8e70e455fda70",
                       "joehoover/zephyr-7b-alpha:14ec63365a1141134c41b652fe798633f48b1fd28b356725c4d8842a0ac151ee",
                       "meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d",
                       "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
                       "meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e", ]
    else:
        llm_options = ["default"]

    # Allow users to type a custom LLM or choose from list
    chosen_LLM = st.selectbox("Type or Choose Language Model", llm_options)

    api_key = st.text_input("OpenAI API Key", type="password")

if api_key and chosen_provider == "OpenAI":
    logger.info("OpenAI API KEY")
    try:
        openai.api_key = api_key
    except:
        st.sidebar.write("Incorrect API key provided")
elif api_key and chosen_provider == "Replicate":
    logger.info("Replicate API KEY")

    os.environ['REPLICATE_API_TOKEN'] = api_key
else:
    chosen_LLM = "default"
    chosen_provider = "Default"

if youtube_url or uploaded_file:
    video_file_path = os.path.join(output_path_video, f"{media_loader.media_id}.mp3")
    transcription_file_path = os.path.join(output_path_transcription, f"{media_loader.media_id}.json")

    if not check_file_exists(video_file_path):
        download_video(media_loader)
    else:
        logger.info(f"Video already downloaded: {video_file_path}")
    if not check_file_exists(transcription_file_path):
        transcribe_video(media_loader, output_path_video, output_path_transcription)
    else:
        logger.info(f"Transcription already exists: {transcription_file_path}")

    video_retriever = factory_media(media_loader.media_id, top_k=int(similarity_top_k))
    transcript_summary = factory_transcript(media_loader.media_id, model=chosen_LLM, llm_provider=chosen_provider)

    docs = load_transcription(media_loader, output_path_transcription)

    col2, col3 = st.columns([3, 1])

    # Main Content - Middle Section
    video_slot = col2.empty()

    with col2:
        if isinstance(media_loader, UploadedMediaLoader):
            video_slot.video(uploaded_file)

        elif isinstance(media_loader, YouTubeLoader):
            video_slot.video(youtube_url)

        st.title("Summary")
        # Display summary here
        st.write(transcript_summary.get_document_summary())
        # Initialize session_state for chat history if it doesn't exist
        if 'chat_history' not in st.session_state:
            st.session_state.chat_history = []

        # Main Content - Bottom Section for Chat
        st.title("Ask me")
    with col3:
        user_input = st.text_input("Search:")
        if user_input:

            if isinstance(media_loader, UploadedMediaLoader):
                video_slot.video(uploaded_file)
            elif isinstance(media_loader, YouTubeLoader):
                video_slot.video(youtube_url)

            raw_results = video_retriever.search(user_input)
            for i, result in enumerate(raw_results):
                text_content = result.node.text
                start_time = int(result.node.metadata['start'])

                full_youtube_url = f"{youtube_url}&t={start_time}s"

                if st.button(text_content, key=f"button_{i}"):
                    st.session_state.current_video = full_youtube_url
                    if isinstance(media_loader, UploadedMediaLoader):
                        video_slot.video(uploaded_file, start_time=start_time)

                    elif isinstance(media_loader, YouTubeLoader):
                        video_slot.video(youtube_url, start_time=start_time)

    with col2:
        chat_placeholder = st.empty()


        def on_btn_click():
            del st.session_state.past[:]
            del st.session_state.generated[:]


        def on_input_change():
            user_input = st.session_state.user_input
            st.session_state.past.append(user_input)

            # Generate response only for the latest input
            latest_response = generate_response(st.session_state['past'][-1])

            st.session_state.generated.append(latest_response)
            st.session_state.user_input = ""  # This will empty the "User Input:" text box


        if 'generated' not in st.session_state:
            st.session_state['generated'] = []
        if 'past' not in st.session_state:
            st.session_state['past'] = []

        with chat_placeholder.container():
            for i in range(len(st.session_state['generated'])):
                message(st.session_state['past'][i], is_user=True, key=f"{i}_user")

                # Displaying generated message
                message(st.session_state['generated'][i], key=f"{i}", allow_html=True, is_table=False)
            st.button("Clear message", on_click=on_btn_click)

        with st.container():
            st.text_input("User Input:", on_change=on_input_change, key="user_input")