Default38693 commited on
Commit
ffc9a51
1 Parent(s): 84c8fa4

Upload 8 files

Browse files
Zatta/DL/DownList.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765632
2
+ https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765633
3
+ https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765634
4
+ https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765635
5
+ https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765636
6
+ https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765637
7
+ https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765638
8
+ https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765639
Zatta/DL/down.ps1 ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # URLリストファイルのパス
2
+ $urlListFile = "H:\ZattaPython\DL\DownList.txt"
3
+
4
+ # URLリストを読み込む
5
+ $urls = Get-Content $urlListFile
6
+
7
+ # WebClientオブジェクトを作成
8
+ $webClient = New-Object System.Net.WebClient
9
+
10
+ # リダイレクトを自動で処理するための設定
11
+ $webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT; Windows NT 10.0; en-US) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36")
12
+
13
+ # 各URLからファイルをダウンロード
14
+ foreach ($url in $urls) {
15
+ try {
16
+ $fileName = [System.IO.Path]::GetFileName($url)
17
+ $webClient.DownloadFile($url, $fileName)
18
+ Write-Host "ダウンロード成功: $url -> $fileName"
19
+ } catch {
20
+ Write-Warning "ダウンロード失敗: $url"
21
+ }
22
+
23
+ # 20秒待機
24
+ Start-Sleep -Seconds 20
25
+ }
26
+
27
+ # WebClientオブジェクトを破棄
28
+ $webClient.Dispose()
Zatta/DL/あ.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ xformersの手動ダウンロードがめんどくさかったので自動化しようと思ったら、ログインしてないとできなかった。なので、ボツ
Zatta/file_compressor.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from PIL import Image
3
+
4
+ Image.MAX_IMAGE_PIXELS = 1000000000
5
+
6
+ def resize_image(file_path, scale_percent):
7
+ with Image.open(file_path) as img:
8
+ width, height = img.size
9
+ new_width = int(width * scale_percent / 100)
10
+ new_height = int(height * scale_percent / 100)
11
+ img_resized = img.resize((new_width, new_height))
12
+ return img_resized
13
+
14
+ def process_files_in_directory(directory_path, size_threshold, scale_percent):
15
+ dir_path = Path(directory_path)
16
+ for file_path in dir_path.glob('*.png'):
17
+ file_size = file_path.stat().st_size
18
+ if file_size >= size_threshold:
19
+ img_resized = resize_image(file_path, scale_percent)
20
+ img_resized.save(file_path)
21
+ resized_file_size = file_path.stat().st_size # リサイズ後のファイルサイズを取得
22
+ print(f"Resized and saved {file_path} with new size: {resized_file_size} bytes")
23
+
24
+ if __name__ == "__main__":
25
+ directory_path = r"E:\Dataset\XXXXXXX" # ここにディレクトリへのパスを入力してください
26
+ size_threshold = 32 * 1024 * 1024 # 32MB
27
+ scale_percent = 50 # 縮小率50%
28
+
29
+ process_files_in_directory(directory_path, size_threshold, scale_percent)
Zatta/file_renamer.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ from pathlib import Path
3
+
4
+ #指定ディレクトリのpngファイル名をmd5に変更するやつ。全角とかスペースの考慮がだるいときに
5
+
6
+ def md5_hash(file_path):
7
+ with open(file_path, 'rb') as f:
8
+ file_data = f.read()
9
+ md5_hash = hashlib.md5(file_data).hexdigest()
10
+ return md5_hash
11
+
12
+ def rename_files_in_directory(directory_path):
13
+ dir_path = Path(directory_path)
14
+ for file_path in dir_path.glob('*.png'):
15
+ new_file_name = md5_hash(file_path) + ".png"
16
+ new_file_path = file_path.parent / new_file_name
17
+ file_path.rename(new_file_path)
18
+ print(f"Renamed {file_path} to {new_file_path}")
19
+
20
+ if __name__ == "__main__":
21
+ directory_path = r"E:\Dataset\XXXXXXX" # ここにディレクトリへのパスを入力してください
22
+ rename_files_in_directory(directory_path)
Zatta/svg_convert.ps1 ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $inputDirectory = "inPath"
2
+ $outputDirectory = "outPath"
3
+
4
+ if (-not (Test-Path -Path $outputDirectory)) {
5
+ New-Item -ItemType Directory -Path $outputDirectory
6
+ }
7
+
8
+ Get-ChildItem -Path $inputDirectory -Filter *.svg | ForEach-Object {
9
+ $inputFilePath = $_.FullName
10
+ $outputFilePath = Join-Path -Path $outputDirectory -ChildPath ($_.BaseName + ".png")
11
+
12
+ & 'C:\Program Files\Inkscape\bin\inkscape.exe' `
13
+ --export-type="png" `
14
+ --export-filename="$outputFilePath" `
15
+ --export-dpi=800 `
16
+ $inputFilePath
17
+ }
Zatta/tag_counter.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from collections import defaultdict
3
+
4
+ # n割以上に共通してるタグをピックアップするやつ
5
+
6
+ def count_tags(directory_path):
7
+ dir_path = Path(directory_path)
8
+ tag_count = defaultdict(int)
9
+ total_files = 0
10
+
11
+ for file_path in dir_path.glob('*.txt'):
12
+ total_files += 1
13
+ with open(file_path, 'r') as f:
14
+ tags = f.read().replace(" ", "").strip().split(',')
15
+ for tag in tags:
16
+ tag_count[tag] += 1
17
+ print(tag)
18
+
19
+ return tag_count, total_files
20
+
21
+ def find_common_tags(tag_count, total_files, threshold):
22
+ common_tags = [tag for tag, count in tag_count.items() if count / total_files >= threshold]
23
+ return common_tags
24
+
25
+ if __name__ == "__main__":
26
+ directory_path = r"E:\Dataset\XXXXXXXX" # ここにディレクトリへのパスを入力してください
27
+ threshold = 0.8 # 8割以上のキャプションファイルに使われているタグのみ
28
+
29
+ tag_count, total_files = count_tags(directory_path)
30
+ print(tag_count)
31
+ print(total_files)
32
+ common_tags = find_common_tags(tag_count, total_files, threshold)
33
+ output = ", ".join(common_tags)
34
+
35
+ print(f"Common tags (used in {threshold * 100}% or more of the files): {output}")
Zatta/username_searcher.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from pathlib import Path
3
+
4
+ # データセットのタグからユーザーネームを検知した時にアラートする
5
+
6
+ def check_tags(directory_path):
7
+ dir_path = Path(directory_path)
8
+ username_pattern = re.compile(r'(username|user_name)', re.IGNORECASE)
9
+
10
+ for file_path in dir_path.glob('*.txt'):
11
+ with open(file_path, 'r') as f:
12
+ tags = f.read().replace(" ", "").strip().split(',')
13
+ for tag in tags:
14
+ if username_pattern.search(tag):
15
+ print(f"Warning: File {file_path.name} contains tag: {tag}")
16
+
17
+ if __name__ == "__main__":
18
+ directory_path = r"E:\Dataset\XXXXXX" # ここにディレクトリへのパスを入力してください
19
+
20
+ check_tags(directory_path)