Spaces:
Build error
Build error
import os | |
import streamlit as st | |
from gdown import download_folder | |
from llama_index import ServiceContext | |
from llama_index import SimpleDirectoryReader | |
from llama_index import VectorStoreIndex | |
from llama_index import set_global_service_context | |
from llama_index.embeddings import OpenAIEmbedding | |
from llama_index.llms import AzureOpenAI | |
def download_test_data(): | |
# url = f"https://drive.google.com/drive/folders/uc?export=download&confirm=pbef&id={file_id}" | |
url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y" | |
with st.spinner(text="Downloading test data. This might take a minute."): | |
# @TODO: replace gown solution with a custom solution compatible with GitHub and | |
# use st.progress to get more verbose during download | |
download_folder(url=url, quiet=False, use_cookies=False, output="./data/") | |
def load_data(): | |
with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."): | |
documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data() | |
with st.spinner(text="Setting up Azure OpenAI..."): | |
llm = AzureOpenAI( | |
model="gpt-3.5-turbo", | |
engine=os.environ["ENGINE"], | |
temperature=0.5, | |
api_key=os.environ["OPENAI_API_KEY"], | |
api_base=os.environ["OPENAI_API_BASE"], | |
api_type="azure", | |
api_version=os.environ["OPENAI_API_VERSION"], | |
system_prompt="You are an expert on André's research and your job is to answer" | |
"technical questions. Assume that all questions are related to" | |
"André's research. Keep your answers technical and based on facts;" | |
" do not hallucinate features.", | |
) | |
with st.spinner(text="Setting up OpenAI Embedding..."): | |
# You need to deploy your own embedding model as well as your own chat completion model | |
embed_model = OpenAIEmbedding( | |
model="text-embedding-ada-002", | |
deployment_name=os.environ["ENGINE_EMBEDDING"], | |
api_key=os.environ["OPENAI_API_KEY"], | |
api_base=os.environ["OPENAI_API_BASE"], | |
api_type="azure", | |
api_version=os.environ["OPENAI_API_VERSION"], | |
embed_batch_size=10, # set to low value to reduce rate limit -> may degrade response runtime | |
) | |
with st.spinner(text="Setting up Vector Store Index..."): | |
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model) # , chunk_size=512) | |
set_global_service_context(service_context) | |
index = VectorStoreIndex.from_documents(documents) # , service_context=service_context) | |
return index | |