Spaces:

X-Pipe
/

flash

Sleeping

flash / llama /storage_context.py

NickNYU

[bugfix]fix the cut-off issue due to LLM predict token limit(256 for openai python lib default), by setting temperature to 0 and set LLM predict method from compact-refine to refine

bd59653 over 2 years ago

raw

history blame contribute delete

2.05 kB

	from llama_index import StorageContext
	from typing import List
	from abc import abstractmethod, ABC

	from llama_index import Document

	from core.lifecycle import Lifecycle
	from llama.service_context import ServiceContextManager


	class StorageContextManager(Lifecycle, ABC):
	@abstractmethod
	def get_storage_context(self) -> StorageContext:
	pass


	class LocalStorageContextManager(StorageContextManager):
	storage_context: StorageContext

	def __init__(
	self,
	service_context_manager: ServiceContextManager,
	dataset_path: str = "./dataset",
	) -> None:
	super().__init__()
	self.dataset_path = dataset_path
	self.service_context_manager = service_context_manager

	def get_storage_context(self) -> StorageContext:
	return self.storage_context

	def do_init(self) -> None:
	from llama.utils import is_local_storage_files_ready

	if is_local_storage_files_ready(self.dataset_path):
	self.storage_context = StorageContext.from_defaults(
	persist_dir=self.dataset_path
	)
	else:
	docs = self._download()
	self._indexing(docs)

	def do_start(self) -> None:
	# self.logger.info("[do_start]%", **self.storage_context.to_dict())
	pass

	def do_stop(self) -> None:
	# self.logger.info("[do_stop]%", **self.storage_context.to_dict())
	pass

	def do_dispose(self) -> None:
	self.storage_context.persist(self.dataset_path)

	def _download(self) -> List[Document]:
	from llama.data_loader import GithubLoader

	loader = GithubLoader()
	return loader.load()

	def _indexing(self, docs: List[Document]) -> None:
	from llama_index import GPTVectorStoreIndex

	index = GPTVectorStoreIndex.from_documents(
	docs, service_context=self.service_context_manager.get_service_context()
	)
	index.storage_context.persist(persist_dir=self.dataset_path)
	self.storage_context = index.storage_context