Spaces:
Sleeping
Sleeping
File size: 1,828 Bytes
1536dad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
---
run_name: "inference-server"
embeddings:
model: "Snowflake/snowflake-arctic-embed-m-long"
dim: 768
device: "cuda:0"
indexing:
batch_size: 16
index_type: "hnsw"
indexes:
hnsw_pq:
hnsw_m: 32
pq_m: 16
bits: 8
ef_construction: 100
ef_search: 64
hnsw:
hnsw_m: 32
ef_construction: 100
ef_search: 64
rag:
dense_k: 10
mode: "inference" # validation or inference
reranking:
enabled: true
reranking_k: 3
model: "BAAI/bge-reranker-v2-m3"
device: "cuda:0"
summary:
enabled: true
# uses the same model as hyde
hyde:
enabled: true
model: "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"
max_seq_length: 2048
load_in_4bit: true
device: "cuda:0"
max_new_tokens: 256
sparse:
enabled: false
model: "tomaarsen/span-marker-roberta-large-fewnerd-fine-super"
device: "cpu"
sparse_k: 5
reader:
model: "jiviteshjn/pittsburgh-rag-qa-mistral-finetuned"
max_seq_length: 2048
load_in_4bit: true
device: "cuda:0"
batch_size: 4
max_new_tokens: 64
files:
index: "data/contexts__1024__snowflake-arctic-embed-m-long__hnsw__32_100_64.index"
sparse_index: "data/contexts__1024.sparse_index"
context: "/kaggle/input/contexts__1024__with_entities.jsonl"
embeddings: "/kaggle/input/contexts__1024__snowflake-arctic-embed-m-long.csv"
questions_jsonl: "/kaggle/input/questions__annotated.jsonl"
answers_jsonl: "/kaggle/working/answers__annotated__combo-dense-10-no-sparse-rerank-4-hyde-mistral-finetuned.jsonl"
questions_txt: "/kaggle/input/questions__test__split/questions_part_2.txt"
answers_txt: "/kaggle/working/answers__test__testset-final-2-256-dense-10-no-sparse-rerank-3-summary-hyde-mistral-finetuned.txt"
wandb:
enabled: false
entity: "INSERT ENTITY"
project: "INERT PROJECT"
|