# pip install google-cloud-aiplatform # pip install langchain # pip install chromadb # pip install pytube # pip install youtube-transcript-api # pip install gradio from google.cloud import aiplatform # pip install -U langchain-google-vertexai # from google.colab import auth as google_auth # google_auth.authenticate_user() import vertexai PROJECT_ID = "ace-hydra-404205" #enter your project id here vertexai.init(project=PROJECT_ID) from langchain.document_loaders import YoutubeLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.chains import RetrievalQA # from langchain.llms import VertexAI from langchain_google_vertexai import VertexAI llm = VertexAI( model_name="text-bison@001", max_output_tokens=256, temperature=0.1, top_p=0.8, top_k=40, verbose=True, ) from langchain_google_vertexai import VertexAIEmbeddings # Embedding EMBEDDING_QPM = 100 EMBEDDING_NUM_BATCH =5 embeddings = VertexAIEmbeddings( Model_name="textembedding-gecko@001", requests_per_minute=EMBEDDING_QPM, num_instances_per_batch=EMBEDDING_NUM_BATCH, ) loader = YoutubeLoader.from_youtube_url("https://youtu.be/kqtD5dpn9C8?si=Jcstba4CpdN5B2wN", add_video_info=True) result = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=0) #chunking the transcripts docs = text_splitter.split_documents(result) # print(f"# of documents = {len(docs)}") #34 db = Chroma.from_documents(docs, embeddings) #stores embedding in chromadb retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2}) qa = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True) #chain object def sm_ask(question, print_results=True): video_subset = qa({"query": question}) context = video_subset prompt = f""" Answer the following question in a detailed manner, using information from the text below. If the answer is not in the text,say I dont know and do not generate your own response. Question: {question} Text: {context} Question: {question} Answer: """ parameters = { "temperature": 0.1, "max_output_tokens": 256, "top_p": 0.8, "top_k": 40 } response = llm.predict(prompt, **parameters) return { "answer": response } import gradio as gr def get_response(input_text): response = sm_ask(input_text) return response grapp = gr.Interface(fn=get_response, inputs="text", outputs="text") grapp.launch()