Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
import json
|
3 |
import re
|
4 |
from sentence_transformers import SentenceTransformer, CrossEncoder
|
|
|
5 |
from openai import OpenAI
|
6 |
import hnswlib
|
7 |
import numpy as np
|
@@ -19,11 +20,12 @@ MAX_INPUT_TOKEN_LENGTH = 4000
|
|
19 |
EMBED_DIM = 1024
|
20 |
K = 10
|
21 |
EF = 100
|
22 |
-
SEARCH_INDEX = "search_index.bin"
|
23 |
-
EMBEDDINGS_FILE = "embeddings.npy"
|
24 |
-
DOCUMENT_DATASET = "chunked_data.parquet"
|
25 |
COSINE_THRESHOLD = 0.7
|
26 |
|
|
|
|
|
|
|
|
|
27 |
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
28 |
print("Running on device:", torch_device)
|
29 |
print("CPU threads:", torch.get_num_threads())
|
@@ -294,7 +296,7 @@ def check_input_token_length(message: str, chat_history: list[tuple[str, str]],
|
|
294 |
)
|
295 |
|
296 |
|
297 |
-
search_index =
|
298 |
data_df = pd.read_parquet(DOCUMENT_DATASET).reset_index()
|
299 |
with gr.Blocks(css="style.css") as demo:
|
300 |
gr.Markdown(DESCRIPTION)
|
|
|
2 |
import json
|
3 |
import re
|
4 |
from sentence_transformers import SentenceTransformer, CrossEncoder
|
5 |
+
from huggingface_hub import hf_hub_download
|
6 |
from openai import OpenAI
|
7 |
import hnswlib
|
8 |
import numpy as np
|
|
|
20 |
EMBED_DIM = 1024
|
21 |
K = 10
|
22 |
EF = 100
|
|
|
|
|
|
|
23 |
COSINE_THRESHOLD = 0.7
|
24 |
|
25 |
+
SEARCH_INDEX = hf_hub_download(repo_id="sayakpaul/diffusers-qa-chatbot-artifacts", filename="search_index.bin", repo_type="dataset")
|
26 |
+
EMBEDDINGS_FILE = hf_hub_download(repo_id="sayakpaul/diffusers-qa-chatbot-artifacts", filename="embeddings.npy", repo_type="dataset")
|
27 |
+
DOCUMENT_DATASET = hf_hub_download(repo_id="sayakpaul/diffusers-qa-chatbot-artifacts", filename="chunked_data.parquet", repo_type="dataset")
|
28 |
+
|
29 |
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
30 |
print("Running on device:", torch_device)
|
31 |
print("CPU threads:", torch.get_num_threads())
|
|
|
296 |
)
|
297 |
|
298 |
|
299 |
+
search_index = load_hnsw_index(SEARCH_INDEX) # create_hnsw_index(EMBEDDINGS_FILE)
|
300 |
data_df = pd.read_parquet(DOCUMENT_DATASET).reset_index()
|
301 |
with gr.Blocks(css="style.css") as demo:
|
302 |
gr.Markdown(DESCRIPTION)
|