import gradio as gr import torch import os from langchain.document_loaders import YoutubeLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceBgeEmbeddings from langchain.chains import RetrievalQA from langchain import HuggingFaceHub from urllib.parse import urlparse, parse_qs def extract_video_id(youtube_url): try: parsed_url = urlparse(youtube_url) query_params = parse_qs(parsed_url.query) video_id = query_params.get('v', [None])[0] return video_id except Exception as e: return f"Error extracting video ID: {e}" def process_video(youtube_url, question): video_id = extract_video_id(youtube_url) if not video_id: return 'Invalid YouTube URL' try: # Initialize the YouTube Loader loader = YoutubeLoader(video_id) documents = loader.load() # Process the documents text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) documents = text_splitter.split_documents(documents) # Initialize Vector Store model_name = "BAAI/bge-base-en" encode_kwargs = {'normalize_embeddings': True} vectordb = Chroma.from_documents( documents, embedding=HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}, encode_kwargs=encode_kwargs) ) # Setup the QA Chain HUGGINGFACE_API_TOKEN = os.environ['HUGGINGFACE_API_TOKEN'] repo_id = "tiiuae/falcon-7b-instruct" qa_chain = RetrievalQA.from_chain_type( llm=HuggingFaceHub(huggingfacehub_api_token=HUGGINGFACE_API_TOKEN, repo_id=repo_id, model_kwargs={"temperature":0.1, "max_new_tokens":1000}), retriever=vectordb.as_retriever(), return_source_documents=False, verbose=False ) # Process the question llm_response = qa_chain(question) return llm_response['result'] except Exception as e: return f"Error processing video: {e}" iface = gr.Interface( fn=process_video, inputs=["text", "text"], outputs="text", title="YouTube Video AI Assistant", description="Enter a YouTube URL and a question to get AI-generated answers based on the video." ) if __name__ == "__main__": iface.launch(share=True)