Spaces:
Runtime error
Runtime error
import pytest | |
from typing import Any | |
from huggingface_hub import snapshot_download | |
from langchain.embeddings import HuggingFaceInstructEmbeddings | |
from langchain.vectorstores import FAISS | |
snapshot_download( | |
repo_id='KonradSzafer/index-large-notebooks', | |
allow_patterns=['*.faiss', '*.pkl'], | |
repo_type='dataset', | |
local_dir='indexes/' | |
) | |
def embedding_model() -> HuggingFaceInstructEmbeddings: | |
model_name = 'hkunlp/instructor-large' | |
embed_instruction = 'Represent the Hugging Face library documentation' | |
query_instruction = 'Query the most relevant piece of information from the Hugging Face documentation' | |
return HuggingFaceInstructEmbeddings( | |
model_name=model_name, | |
embed_instruction=embed_instruction, | |
query_instruction=query_instruction, | |
) | |
def index_path() -> str: | |
return 'indexes/' | |
def index(embedding_model: HuggingFaceInstructEmbeddings, index_path: str): | |
return FAISS.load_local(index_path, embedding_model) | |
def query() -> str: | |
return 'How to use the tokenizer?' | |
def test_load_index(embedding_model: HuggingFaceInstructEmbeddings, index_path: str): | |
index = FAISS.load_local(index_path, embedding_model) | |
assert index is not None, 'Failed to load index' | |
def test_index_page_content(index, query: str): | |
query_docs = index.similarity_search(query=query, k=3) | |
assert isinstance(query_docs[0].page_content, str) | |
def test_index_metadata(index, query): | |
query_docs = index.similarity_search(query=query, k=3) | |
assert isinstance(query_docs[0].metadata['source'], str) | |