Spaces:
Running
Running
| # -*- coding: utf-8 -*- | |
| """RAG | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/18JQq2-GCmrrwAk9UuvqqeVrrnB4raKZt | |
| """ | |
| # !pip install -q pypdf | |
| # !pip install torch | |
| # !pip install -q transformers | |
| # !pip -q install sentence-transformers | |
| # !pip install -q llama-index | |
| # !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir | |
| # !pip install llama-index-embeddings-huggingface | |
| # !pip install llama-index-llms-llama-cpp | |
| # !pip install cuda | |
| import torch | |
| from llama_index.core import SimpleDirectoryReader, VectorStoreIndex | |
| from llama_index.llms.llama_cpp import LlamaCPP | |
| from llama_index.llms.llama_cpp.llama_utils import ( | |
| messages_to_prompt, | |
| completion_to_prompt, | |
| ) | |
| llm = LlamaCPP( | |
| # You can pass in the URL to a GGML model to download it automatically | |
| # model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf', | |
| model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf', | |
| # optionally, you can set the path to a pre-downloaded model instead of model_url | |
| model_path=None, | |
| temperature=0.1, | |
| max_new_tokens=256, | |
| # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room | |
| context_window=4096, | |
| # kwargs to pass to __call__() | |
| generate_kwargs={}, | |
| # kwargs to pass to __init__() | |
| # set to at least 1 to use GPU | |
| model_kwargs={"n_gpu_layers": -1}, | |
| # transform inputs into Llama2 format | |
| messages_to_prompt=messages_to_prompt, | |
| completion_to_prompt=completion_to_prompt, | |
| verbose=True, | |
| ) | |
| from llama_index.core import SimpleDirectoryReader | |
| from llama_index.core import Document | |
| documents = SimpleDirectoryReader( | |
| input_files = ["/content/Mindcase Data.pdf"] | |
| ).load_data() | |
| documents = Document(text = "\n\n".join([doc.text for doc in documents])) | |
| import os | |
| from llama_index.core.node_parser import SentenceWindowNodeParser | |
| from llama_index.core import VectorStoreIndex, ServiceContext, load_index_from_storage | |
| def get_build_index(documents,llm,embed_model="local:BAAI/bge-small-en-v1.5",sentence_window_size=3,save_dir="./vector_store/index"): | |
| node_parser = SentenceWindowNodeParser( | |
| window_size = sentence_window_size, | |
| window_metadata_key = "window", | |
| original_text_metadata_key = "original_text" | |
| ) | |
| sentence_context = ServiceContext.from_defaults( | |
| llm = llm, | |
| embed_model= embed_model, | |
| node_parser = node_parser, | |
| ) | |
| if not os.path.exists(save_dir): | |
| # create and load the index | |
| index = VectorStoreIndex.from_documents( | |
| [documents], service_context=sentence_context | |
| ) | |
| index.storage_context.persist(persist_dir=save_dir) | |
| else: | |
| # load the existing index | |
| index = load_index_from_storage( | |
| StorageContext.from_defaults(persist_dir=save_dir), | |
| service_context=sentence_context, | |
| ) | |
| return index | |
| # get the vector index | |
| vector_index = get_build_index(documents=documents, llm=llm, embed_model="local:BAAI/bge-small-en-v1.5", sentence_window_size=3, save_dir="./vector_store/index") | |
| from llama_index.core.postprocessor import MetadataReplacementPostProcessor, SentenceTransformerRerank | |
| def get_query_engine(sentence_index, similarity_top_k=6, rerank_top_n=2): | |
| postproc = MetadataReplacementPostProcessor(target_metadata_key="window") | |
| rerank = SentenceTransformerRerank( | |
| top_n=rerank_top_n, model="BAAI/bge-reranker-base" | |
| ) | |
| engine = sentence_index.as_query_engine( | |
| similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank] | |
| ) | |
| return engine | |
| query_engine = get_query_engine(sentence_index=vector_index, similarity_top_k=6, rerank_top_n=2) | |
| def query(input): | |
| return query_engine(input) | |