import gradio as gr import numpy as np from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import LLMChain from langchain import PromptTemplate import re import pandas as pd from langchain.vectorstores import FAISS import requests from typing import List from langchain.schema import ( SystemMessage, HumanMessage, AIMessage ) import os from langchain.embeddings import HuggingFaceEmbeddings from langchain.chat_models import ChatOpenAI from langchain.llms.base import LLM from typing import Optional, List, Mapping, Any import ast from utils import ClaudeLLM from qdrant_client import models, QdrantClient from sentence_transformers import SentenceTransformer embeddings = HuggingFaceEmbeddings() embeddings_1 = HuggingFaceEmbeddings(model_name = "BAAI/bge-large-en-v1.5") db_art = FAISS.load_local('db_art', embeddings) db_art_1 = FAISS.load_local('db_art_1', embeddings_1) # db_yt = FAISS.load_local('db_yt', embeddings) mp_docs = {} def retrieve_thoughts(query, n, db): # print(db.similarity_search_with_score(query = query, k = k, fetch_k = k*10)) #filter = {'Product Name': prod} docs_with_score = db.similarity_search_with_score(query = query, k = len(db.index_to_docstore_id.values()), fetch_k = len(db.index_to_docstore_id.values())) df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], ) df = pd.concat((df, pd.DataFrame([dict(doc[0])['page_content'] for doc in docs_with_score], columns = ['page_content'])), axis = 1) df = pd.concat((df, pd.DataFrame([doc[1] for doc in docs_with_score], columns = ['score'])), axis = 1) df['_id'] = df['_id'].apply(lambda x: str(x)) df.sort_values("score", inplace = True) # TO-DO: What if user query doesn't match what we provide as documents tier_1 = df[df['score'] < 1] chunks_1 = tier_1.groupby(['_id' ]).apply(lambda x: {f"chunk_{i}": row for i, row in enumerate(x.sort_values('id')[['id', 'score','page_content']].to_dict('records'))}).values tier_1_adjusted = tier_1.groupby(['_id']).first().reset_index()[['_id', 'title', 'author','url', 'score']] tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 ) tier_1_adjusted['chunks'] = list(chunks_1) score = tier_1.groupby(['_id' ]).apply(lambda x: x['score'].median()).values tier_1_adjusted['score'] = list(score) tier_1_adjusted.sort_values("score", inplace = True) tier_1_adjusted = tier_1_adjusted[:min(len(tier_1_adjusted), 10)] return {'tier 1':tier_1_adjusted, } def qa_retrieve_art(query,): docs = "" global db_art global mp_docs thoughts = retrieve_thoughts(query, 0, db_art) if not(thoughts): if mp_docs: thoughts = mp_docs else: mp_docs = thoughts tier_1 = thoughts['tier 1'] reference = tier_1[['_id', 'url', 'author', 'title', 'chunks', 'score']].to_dict('records') return {'Reference': reference} def qa_retrieve_bge(query,): docs = "" global db_art_1 global mp_docs thoughts = retrieve_thoughts(query, 0, db_art_1) if not(thoughts): if mp_docs: thoughts = mp_docs else: mp_docs = thoughts tier_1 = thoughts['tier 1'] reference = tier_1[['_id', 'url', 'author', 'title', 'chunks', 'score']].to_dict('records') return {'Reference': reference} def qa_retrieve_yt(query,): docs = "" global db_yt global mp_docs thoughts = retrieve_thoughts(query, 0, db_yt) if not(thoughts): if mp_docs: thoughts = mp_docs else: mp_docs = thoughts tier_1 = thoughts['tier 1'] reference = tier_1[['_id', 'url', 'author', 'title', 'chunks', 'score']].to_dict('records') return {'Reference': reference} def flush(): return None ref_art_1 = gr.Interface(fn=qa_retrieve_bge, label="bge Articles", inputs=gr.inputs.Textbox(lines=5, label="what would you like to learn about?"), outputs = gr.components.JSON(label="articles")) ref_art = gr.Interface(fn=qa_retrieve_art, label="mpnet Articles", inputs=gr.inputs.Textbox(lines=5, label="what would you like to learn about?"), outputs = gr.components.JSON(label="articles")) # ref_yt = gr.Interface(fn=qa_retrieve_yt, label="Youtube", # inputs=gr.inputs.Textbox(lines=5, label="what would you like to learn about?"), # outputs = gr.components.JSON(label="youtube"),title = "youtube", examples=examples) demo = gr.Parallel( ref_art_1, ref_art) demo.launch()