Commit
•
4d1c962
1
Parent(s):
c0c68e7
feat: added duration for run
Browse files- .gitignore +1 -0
- src/distilabel_dataset_generator/sft.py +16 -3
.gitignore
CHANGED
@@ -160,3 +160,4 @@ cython_debug/
|
|
160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
#.idea/
|
|
|
|
160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
#.idea/
|
163 |
+
.DS_Store
|
src/distilabel_dataset_generator/sft.py
CHANGED
@@ -232,16 +232,29 @@ def generate_dataset(
|
|
232 |
)
|
233 |
num_rows = 5000
|
234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
gr.Info(
|
236 |
-
"Started pipeline execution. This might take a while, depending on the number of rows and turns you have selected. Don't close this page."
|
|
|
237 |
)
|
238 |
result_queue = multiprocessing.Queue()
|
239 |
p = multiprocessing.Process(
|
240 |
target=_run_pipeline,
|
241 |
args=(result_queue, num_turns, num_rows, system_prompt),
|
242 |
)
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
245 |
distiset = result_queue.get()
|
246 |
|
247 |
if dataset_name is not None:
|
|
|
232 |
)
|
233 |
num_rows = 5000
|
234 |
|
235 |
+
if num_rows < 50:
|
236 |
+
duration = 60
|
237 |
+
elif num_rows < 250:
|
238 |
+
duration = 300
|
239 |
+
elif num_rows < 1000:
|
240 |
+
duration = 500
|
241 |
+
else:
|
242 |
+
duration = 1000
|
243 |
+
|
244 |
gr.Info(
|
245 |
+
"Started pipeline execution. This might take a while, depending on the number of rows and turns you have selected. Don't close this page.",
|
246 |
+
duration=duration,
|
247 |
)
|
248 |
result_queue = multiprocessing.Queue()
|
249 |
p = multiprocessing.Process(
|
250 |
target=_run_pipeline,
|
251 |
args=(result_queue, num_turns, num_rows, system_prompt),
|
252 |
)
|
253 |
+
try:
|
254 |
+
p.start()
|
255 |
+
p.join()
|
256 |
+
except Exception as e:
|
257 |
+
raise gr.Error(f"An error occurred during dataset generation: {str(e)}")
|
258 |
distiset = result_queue.get()
|
259 |
|
260 |
if dataset_name is not None:
|