Spaces:
Sleeping
Sleeping
run_name: "inference-server" | |
embeddings: | |
model: "Snowflake/snowflake-arctic-embed-m-long" | |
dim: 768 | |
device: "cuda:0" | |
indexing: | |
batch_size: 16 | |
index_type: "hnsw" | |
indexes: | |
hnsw_pq: | |
hnsw_m: 32 | |
pq_m: 16 | |
bits: 8 | |
ef_construction: 100 | |
ef_search: 64 | |
hnsw: | |
hnsw_m: 32 | |
ef_construction: 100 | |
ef_search: 64 | |
rag: | |
dense_k: 10 | |
mode: "inference" # validation or inference | |
reranking: | |
enabled: true | |
reranking_k: 3 | |
model: "BAAI/bge-reranker-v2-m3" | |
device: "cuda:0" | |
summary: | |
enabled: true | |
# uses the same model as hyde | |
hyde: | |
enabled: true | |
model: "unsloth/Llama-3.2-3B-Instruct-bnb-4bit" | |
max_seq_length: 2048 | |
load_in_4bit: true | |
device: "cuda:0" | |
max_new_tokens: 256 | |
sparse: | |
enabled: false | |
model: "tomaarsen/span-marker-roberta-large-fewnerd-fine-super" | |
device: "cpu" | |
sparse_k: 5 | |
reader: | |
model: "jiviteshjn/pittsburgh-rag-qa-mistral-finetuned" | |
max_seq_length: 2048 | |
load_in_4bit: true | |
device: "cuda:0" | |
batch_size: 4 | |
max_new_tokens: 64 | |
files: | |
index: "data/contexts__1024__snowflake-arctic-embed-m-long__hnsw__32_100_64.index" | |
sparse_index: "data/contexts__1024.sparse_index" | |
context: "/kaggle/input/contexts__1024__with_entities.jsonl" | |
embeddings: "/kaggle/input/contexts__1024__snowflake-arctic-embed-m-long.csv" | |
questions_jsonl: "/kaggle/input/questions__annotated.jsonl" | |
answers_jsonl: "/kaggle/working/answers__annotated__combo-dense-10-no-sparse-rerank-4-hyde-mistral-finetuned.jsonl" | |
questions_txt: "/kaggle/input/questions__test__split/questions_part_2.txt" | |
answers_txt: "/kaggle/working/answers__test__testset-final-2-256-dense-10-no-sparse-rerank-3-summary-hyde-mistral-finetuned.txt" | |
wandb: | |
enabled: false | |
entity: "INSERT ENTITY" | |
project: "INERT PROJECT" | |