RE_UPLOAD-REBUILD-RESTART
Browse files- utils/remove_duplicates.py +19 -0
utils/remove_duplicates.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def unzip_fn(items: list):
|
2 |
+
return [list(i) for i in zip(*items)]
|
3 |
+
|
4 |
+
def remove_duplicates(items: list, key=lambda x: x, show_process=False, unzip=False):
|
5 |
+
'''
|
6 |
+
Remove duplicates from a list of items
|
7 |
+
Args:
|
8 |
+
items: List of items
|
9 |
+
key: Function to get the key of the item
|
10 |
+
show_process: Whether to show the process or not
|
11 |
+
Returns:
|
12 |
+
List: List of items without duplicates
|
13 |
+
'''
|
14 |
+
progress = lambda x, *, desc: x
|
15 |
+
if show_process:
|
16 |
+
import tqdm
|
17 |
+
progress = tqdm.tqdm
|
18 |
+
deduped_items = list({key(item): item for item in progress(items, desc='Deduping...')}.values())
|
19 |
+
return deduped_items if not unzip else unzip_fn(deduped_items)
|