Add concurrent_task_executor function for executing tasks concurrently
Browse files
src/concurrent_task_executor.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import concurrent.futures
|
2 |
+
from typing import Any, Callable, List
|
3 |
+
from tqdm import tqdm
|
4 |
+
|
5 |
+
def concurrent_task_executor(task: Callable[[Any], None], data_list: List[Any], max_workers: int = 32, description: str = None) -> None:
|
6 |
+
"""
|
7 |
+
Execute tasks concurrently on a list of data objects using ThreadPoolExecutor.
|
8 |
+
Args:
|
9 |
+
task (Callable): The function to apply to each data object.
|
10 |
+
data_list (List): The list of data objects.
|
11 |
+
max_workers (int): The maximum number of worker threads (default is 32).
|
12 |
+
description (str, optional): Description for the progress bar.
|
13 |
+
Raises:
|
14 |
+
ValueError: If data_list is empty.
|
15 |
+
Example:
|
16 |
+
>>> def process_data(data):
|
17 |
+
>>> # Process data here
|
18 |
+
>>> pass
|
19 |
+
>>> data_list = [1, 2, 3, 4, 5]
|
20 |
+
>>> concurrent_task_executor(process_data, data_list, max_workers=8, description="Processing data")
|
21 |
+
"""
|
22 |
+
|
23 |
+
if not data_list:
|
24 |
+
raise ValueError("Data list is empty. No tasks to execute.")
|
25 |
+
|
26 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
27 |
+
# Submit tasks to the executor
|
28 |
+
futures = [executor.submit(task, data) for data in data_list]
|
29 |
+
|
30 |
+
# Create progress bar
|
31 |
+
with tqdm(total=len(data_list), desc=description) as pbar:
|
32 |
+
# Wait for all tasks to complete
|
33 |
+
for future in concurrent.futures.as_completed(futures):
|
34 |
+
pbar.update(1) # Update progress bar
|
35 |
+
|
36 |
+
# Clear the data_list after all tasks are completed
|
37 |
+
data_list.clear()
|