Merge branch 'main' of hf.co:/k4d3/toolkit
Browse files- .gitmodules +1 -1
- dataset-tools +1 -1
- utils/crc32.py +18 -0
- utils/remove_grandfathered.py +18 -34
.gitmodules
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
[submodule "dataset-tools"]
|
2 |
path = dataset-tools
|
3 |
-
url =
|
|
|
1 |
[submodule "dataset-tools"]
|
2 |
path = dataset-tools
|
3 |
+
url = git@github.com:ka-de/dataset-tools
|
dataset-tools
CHANGED
@@ -1 +1 @@
|
|
1 |
-
Subproject commit
|
|
|
1 |
+
Subproject commit a8c787494826e9ae2d1e386be8ce304eafb3a0e6
|
utils/crc32.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import zlib
|
2 |
+
import sys
|
3 |
+
|
4 |
+
def calculate_crc32(file_path):
|
5 |
+
with open(file_path, 'rb') as file:
|
6 |
+
data = file.read()
|
7 |
+
crc32_checksum = zlib.crc32(data)
|
8 |
+
return crc32_checksum
|
9 |
+
|
10 |
+
if __name__ == "__main__":
|
11 |
+
if len(sys.argv) != 2:
|
12 |
+
print("Usage: python script.py <file_path>")
|
13 |
+
sys.exit(1)
|
14 |
+
|
15 |
+
file_path = sys.argv[1]
|
16 |
+
checksum = calculate_crc32(file_path)
|
17 |
+
print(f"CRC32 checksum of {file_path}: {checksum:#010x}")
|
18 |
+
|
utils/remove_grandfathered.py
CHANGED
@@ -1,42 +1,26 @@
|
|
1 |
#!/usr/bin/env python
|
2 |
# -*- coding: utf-8 -*-
|
3 |
-
#
|
4 |
-
import os
|
5 |
-
import sys
|
6 |
-
# Add the parent directory to the Python path
|
7 |
-
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
8 |
-
|
9 |
-
from utils.file_processor import FileProcessor, ProcessorOptions
|
10 |
-
import re
|
11 |
-
from pathlib import Path
|
12 |
|
13 |
-
|
14 |
-
def __init__(self, options: ProcessorOptions, pattern: str):
|
15 |
-
super().__init__(options)
|
16 |
-
self.pattern = pattern
|
17 |
-
|
18 |
-
def process_content(self, content: str) -> str:
|
19 |
-
# Remove occurrences of the grandfathered content pattern
|
20 |
-
content = re.sub(self.pattern, '', content)
|
21 |
-
# Normalize whitespace and commas
|
22 |
-
content = re.sub(r'\s+,', ',', content)
|
23 |
-
content = re.sub(r',\s+', ',', content)
|
24 |
-
return re.sub(r'\s+', ' ', content).strip()
|
25 |
|
26 |
-
def
|
27 |
-
|
|
|
28 |
|
29 |
-
|
30 |
-
recursive=True,
|
31 |
-
dry_run=False,
|
32 |
-
file_extensions={'.txt', '.tags'}
|
33 |
-
)
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
|
41 |
if __name__ == "__main__":
|
42 |
-
|
|
|
|
|
|
|
|
1 |
#!/usr/bin/env python
|
2 |
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
def remove_text_from_file(file_path, text_to_remove):
|
7 |
+
with open(file_path, 'r') as file:
|
8 |
+
content = file.read()
|
9 |
|
10 |
+
content = content.replace(text_to_remove, "")
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
with open(file_path, 'w') as file:
|
13 |
+
file.write(content)
|
14 |
+
|
15 |
+
def remove_text_from_files(root_dir, text_to_remove):
|
16 |
+
for subdir, _, files in os.walk(root_dir):
|
17 |
+
for file in files:
|
18 |
+
if file.endswith('.txt') or file.endswith('.tags'):
|
19 |
+
file_path = os.path.join(subdir, file)
|
20 |
+
remove_text_from_file(file_path, text_to_remove)
|
21 |
|
22 |
if __name__ == "__main__":
|
23 |
+
root_directory = '.' # Change this to your root directory
|
24 |
+
text_to_remove = "grandfathered content, "
|
25 |
+
remove_text_from_files(root_directory, text_to_remove)
|
26 |
+
|