alozowski HF Staff commited on
Commit
c272faa
·
1 Parent(s): a947db0

Change provider in config.py

Browse files
Files changed (1) hide show
  1. yourbench_space/config.py +17 -8
yourbench_space/config.py CHANGED
@@ -18,12 +18,12 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
18
  "model_list": [
19
  {
20
  "model_name": "Qwen/Qwen2.5-VL-72B-Instruct",
21
- "provider": "novita",
22
  "max_concurrent_requests": 32,
23
  },
24
  {
25
  "model_name": "Qwen/Qwen2.5-72B-Instruct",
26
- "provider": "novita",
27
  "max_concurrent_requests": 32,
28
  },
29
  ],
@@ -36,26 +36,32 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
36
  },
37
  "pipeline": {
38
  "ingestion": {
 
39
  "source_documents_dir": f"{PATH}/{session_uid}/uploaded_files/",
40
  "output_dir": f"{PATH}/{session_uid}/ingested",
41
- "run": True,
42
  },
43
  "upload_ingest_to_hub": {
44
- "source_documents_dir": f"{PATH}/{session_uid}/ingested",
45
  "run": True,
 
46
  },
47
  "summarization": {
48
  "run": True,
 
 
 
49
  },
50
  "chunking": {
51
  "run": True,
52
  "chunking_configuration": {
 
 
 
 
53
  "l_min_tokens": 64,
54
- "l_max_tokens": 128,
55
  "tau_threshold": 0.8,
56
  "h_min": 2,
57
- "h_max": 5,
58
- "num_multihops_factor": 2,
59
  },
60
  },
61
  "single_shot_question_generation": {
@@ -64,7 +70,7 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
64
  "chunk_sampling": {
65
  "mode": "count",
66
  "value": 5,
67
- "random_seed": 123,
68
  },
69
  },
70
  "multi_hop_question_generation": {
@@ -79,6 +85,9 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
79
  "lighteval": {
80
  "run": True,
81
  },
 
 
 
82
  },
83
  }
84
 
 
18
  "model_list": [
19
  {
20
  "model_name": "Qwen/Qwen2.5-VL-72B-Instruct",
21
+ "provider": "nebius",
22
  "max_concurrent_requests": 32,
23
  },
24
  {
25
  "model_name": "Qwen/Qwen2.5-72B-Instruct",
26
+ "provider": "nebius",
27
  "max_concurrent_requests": 32,
28
  },
29
  ],
 
36
  },
37
  "pipeline": {
38
  "ingestion": {
39
+ "run": False,
40
  "source_documents_dir": f"{PATH}/{session_uid}/uploaded_files/",
41
  "output_dir": f"{PATH}/{session_uid}/ingested",
 
42
  },
43
  "upload_ingest_to_hub": {
 
44
  "run": True,
45
+ "source_documents_dir": f"{PATH}/{session_uid}/ingested",
46
  },
47
  "summarization": {
48
  "run": True,
49
+ "max_tokens": 16384,
50
+ "token_overlap": 64,
51
+ "encoding_name": "cl100k_base",
52
  },
53
  "chunking": {
54
  "run": True,
55
  "chunking_configuration": {
56
+ "chunking_mode": "fast_chunking",
57
+ "l_max_tokens": 256,
58
+ "token_overlap": 64,
59
+ "encoding_name": "cl100k_base",
60
  "l_min_tokens": 64,
 
61
  "tau_threshold": 0.8,
62
  "h_min": 2,
63
+ "h_max": 3,
64
+ "num_multihops_factor": 3,
65
  },
66
  },
67
  "single_shot_question_generation": {
 
70
  "chunk_sampling": {
71
  "mode": "count",
72
  "value": 5,
73
+ "random_seed": 49,
74
  },
75
  },
76
  "multi_hop_question_generation": {
 
85
  "lighteval": {
86
  "run": True,
87
  },
88
+ "citation_score_filtering": {
89
+ "run": True,
90
+ },
91
  },
92
  }
93