Spaces:
Sleeping
Sleeping
NickNYU
[bugfix]fix the cut-off issue due to LLM predict token limit(256 for openai python lib default), by setting temperature to 0 and set LLM predict method from compact-refine to refine
bd59653
from llama_index import StorageContext | |
from typing import List | |
from abc import abstractmethod, ABC | |
from llama_index import Document | |
from core.lifecycle import Lifecycle | |
from llama.service_context import ServiceContextManager | |
class StorageContextManager(Lifecycle, ABC): | |
def get_storage_context(self) -> StorageContext: | |
pass | |
class LocalStorageContextManager(StorageContextManager): | |
storage_context: StorageContext | |
def __init__( | |
self, | |
service_context_manager: ServiceContextManager, | |
dataset_path: str = "./dataset", | |
) -> None: | |
super().__init__() | |
self.dataset_path = dataset_path | |
self.service_context_manager = service_context_manager | |
def get_storage_context(self) -> StorageContext: | |
return self.storage_context | |
def do_init(self) -> None: | |
from llama.utils import is_local_storage_files_ready | |
if is_local_storage_files_ready(self.dataset_path): | |
self.storage_context = StorageContext.from_defaults( | |
persist_dir=self.dataset_path | |
) | |
else: | |
docs = self._download() | |
self._indexing(docs) | |
def do_start(self) -> None: | |
# self.logger.info("[do_start]%", **self.storage_context.to_dict()) | |
pass | |
def do_stop(self) -> None: | |
# self.logger.info("[do_stop]%", **self.storage_context.to_dict()) | |
pass | |
def do_dispose(self) -> None: | |
self.storage_context.persist(self.dataset_path) | |
def _download(self) -> List[Document]: | |
from llama.data_loader import GithubLoader | |
loader = GithubLoader() | |
return loader.load() | |
def _indexing(self, docs: List[Document]) -> None: | |
from llama_index import GPTVectorStoreIndex | |
index = GPTVectorStoreIndex.from_documents( | |
docs, service_context=self.service_context_manager.get_service_context() | |
) | |
index.storage_context.persist(persist_dir=self.dataset_path) | |
self.storage_context = index.storage_context | |