|
import gradio as gr |
|
import os |
|
from langchain.chains import RetrievalQA |
|
from langchain.llms import OpenAI |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.document_loaders import DirectoryLoader |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.embeddings import OpenAIEmbeddings |
|
from langchain.vectorstores import Chroma |
|
|
|
from gpt_index import SimpleDirectoryReader, GPTListIndex, GPTSimpleVectorIndex, LLMPredictor, PromptHelper, ServiceContext |
|
|
|
|
|
import sys |
|
|
|
|
|
|
|
dir_path = "./docs" |
|
|
|
os.environ["OPENAI_API_KEY"] |
|
|
|
|
|
os.makedirs(dir_path, exist_ok=True) |
|
|
|
def construct_index(directory_path): |
|
max_input_size = 4096 |
|
num_outputs = 512 |
|
max_chunk_overlap = 20 |
|
chunk_size_limit = 600 |
|
|
|
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) |
|
|
|
|
|
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=num_outputs)) |
|
|
|
|
|
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) |
|
|
|
documents = SimpleDirectoryReader(directory_path).load_data() |
|
|
|
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) |
|
|
|
index.save_to_disk('index.json') |
|
|
|
return index |
|
|
|
def chatbot(input_text): |
|
index = GPTSimpleVectorIndex.load_from_disk('index.json') |
|
response = index.query(input_text, response_mode="compact") |
|
return response.response |
|
|
|
def qa_system(pdf_file, openai_key, prompt, chain_type, k): |
|
os.environ["OPENAI_API_KEY"] = openai_key |
|
|
|
|
|
|
|
loader = DirectoryLoader(dir_path, glob="**/*.pdf") |
|
documents = loader.load() |
|
|
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) |
|
texts = text_splitter.split_documents(documents) |
|
|
|
|
|
embeddings = OpenAIEmbeddings() |
|
|
|
|
|
db = Chroma.from_documents(texts, embeddings) |
|
|
|
|
|
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k}) |
|
|
|
|
|
qa = RetrievalQA.from_chain_type( |
|
llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True) |
|
|
|
|
|
result = qa({"query": prompt}) |
|
return result['result'], [doc.page_content for doc in result["source_documents"]] |
|
|
|
|
|
|
|
index = construct_index(dir_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openai_key = gr.inputs.Textbox(label="OpenAI API Key", type="password") |
|
prompt = gr.inputs.Textbox(label="Question Prompt") |
|
chain_type = gr.inputs.Radio(['stuff', 'map_reduce', "refine", "map_rerank"], label="Chain Type") |
|
k = gr.inputs.Slider(minimum=1, maximum=5, default=1, label="Number of Relevant Chunks") |
|
|
|
output_text = gr.outputs.Textbox(label="Answer") |
|
output_docs = gr.outputs.Textbox(label="Relevant Source Text") |
|
|
|
|
|
|
|
|
|
|
|
|
|
gr.Interface(fn=chatbot, |
|
inputs= prompt, outputs="text", |
|
title="TKO GPT for URDs - experimental", |
|
description="Tikehau URDs.").launch(debug = True) |
|
|