Spaces:
Runtime error
Runtime error
# https://github.com/jerryjliu/llama_index/issues/1900 | |
from llama_index import ( | |
SimpleDirectoryReader, | |
LLMPredictor, | |
ServiceContext, | |
GPTVectorStoreIndex, | |
PromptHelper, | |
load_index_from_storage, | |
StorageContext, | |
) | |
from langchain.chat_models import ChatOpenAI | |
import gradio as gr | |
import openai | |
import os | |
from pathlib import Path | |
# BRTest | |
openai.api_key = os.environ["OPENAI_API_KEY"] | |
BASE_DIR = Path(os.path.dirname(os.path.abspath(__file__))) | |
# set maximum input size | |
max_input_size = 4096 | |
# set number of output tokens | |
num_outputs = 512 | |
# set maximum chunk overlap | |
max_chunk_overlap = 0.20 | |
# set chunk size limit | |
chunk_size_limit = 600 | |
prompt_helper = PromptHelper( | |
max_input_size, | |
num_outputs, | |
max_chunk_overlap, | |
chunk_size_limit=chunk_size_limit, | |
) | |
llm = ChatOpenAI(temperature=0.7, model_name="gpt-3.5-turbo", max_tokens=num_outputs) | |
llm_predictor = LLMPredictor(llm=llm) | |
# Define the file extension you want to target (e.g., ".txt") | |
target_extension = ".pdf" | |
def get_files_with_extension(directory_path): | |
files_list = [] | |
for root, _, files in os.walk(directory_path): | |
for file in files: | |
if file.endswith(target_extension): | |
file_path = os.path.join(root, file) | |
files_list.append(file_path) | |
return files_list | |
def construct_index(folder_name): | |
DOCS_PATH = Path(os.path.join(BASE_DIR, folder_name, "docs")) | |
Path(DOCS_PATH).mkdir(parents=True, exist_ok=True) | |
files_list = get_files_with_extension(DOCS_PATH) | |
documents = SimpleDirectoryReader(input_files=files_list).load_data() | |
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor) | |
index = GPTVectorStoreIndex.from_documents( | |
documents, service_context=service_context | |
) | |
# index.save_to_disk(f"{folder_name}/index.json") | |
index.storage_context.persist(persist_dir=f"{folder_name}/storage") | |
return index | |
def main(folder_name): | |
DOCS_PATH = Path(os.path.join(BASE_DIR, folder_name, "docs")) | |
Path(DOCS_PATH).mkdir(parents=True, exist_ok=True) | |
def chatbot(input_text): | |
print("chatbot") | |
storage_context = StorageContext.from_defaults( | |
persist_dir=f"{folder_name}/storage" | |
) | |
index = load_index_from_storage(storage_context) | |
# GPTVectorStoreIndex.load_from_disk( | |
# f"{folder_name}\\index.json", | |
# llm_predictor=llm_predictor, | |
# prompt_helper=prompt_helper, | |
# ) | |
query_engine = index.as_query_engine() | |
response = query_engine.query(input_text) | |
print(response) | |
return response.response | |
iface = gr.Interface( | |
fn=chatbot, | |
inputs=gr.components.Textbox(lines=7, label="Enter your text"), | |
outputs="text", | |
title="Custom-trained AI Chatbot", | |
) | |
index = construct_index(folder_name) | |
print("Launcing") | |
# iface.launch(share=True) | |
iface.launch() | |
if __name__ == "__main__": | |
import sys | |
try: | |
folder_name = sys.argv[1] | |
except IndexError: | |
# raise Exception("folder_name not passed\napp.py folder_name") | |
folder_name = "tax" | |
DOCS_PATH = Path(os.path.join(BASE_DIR, folder_name)) | |
if not Path(DOCS_PATH).exists(): | |
raise Exception("folder does not exist.") | |
main(folder_name) | |