Default38693
/

_________

Default38693 commited on May 25, 2023

Commit

ffc9a51

•

1 Parent(s): 84c8fa4

Upload 8 files

Files changed (8) hide show

Zatta/DL/DownList.txt ADDED Viewed

+https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765632
+https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765633
+https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765634
+https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765635
+https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765636
+https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765637
+https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765638
+https://github.com/facebookresearch/xformers/suites/11183189417/artifacts/571765639

Zatta/DL/down.ps1 ADDED Viewed

+# URLリストファイルのパス
+$urlListFile = "H:\ZattaPython\DL\DownList.txt"
+# URLリストを読み込む
+$urls = Get-Content $urlListFile
+# WebClientオブジェクトを作成
+$webClient = New-Object System.Net.WebClient
+# リダイレクトを自動で処理するための設定
+$webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT; Windows NT 10.0; en-US) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36")
+# 各URLからファイルをダウンロード
+foreach ($url in $urls) {
+    try {
+        $fileName = [System.IO.Path]::GetFileName($url)
+        $webClient.DownloadFile($url, $fileName)
+        Write-Host "ダウンロード成功: $url -> $fileName"
+    } catch {
+        Write-Warning "ダウンロード失敗: $url"
+    }
+    # 20秒待機
+    Start-Sleep -Seconds 20
+}
+# WebClientオブジェクトを破棄
+$webClient.Dispose()

Zatta/DL/あ.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ xformersの手動ダウンロードがめんどくさかったので自動化しようと思ったら、ログインしてないとできなかった。なので、ボツ

Zatta/file_compressor.py ADDED Viewed

+from pathlib import Path
+from PIL import Image
+Image.MAX_IMAGE_PIXELS = 1000000000
+def resize_image(file_path, scale_percent):
+    with Image.open(file_path) as img:
+        width, height = img.size
+        new_width = int(width * scale_percent / 100)
+        new_height = int(height * scale_percent / 100)
+        img_resized = img.resize((new_width, new_height))
+    return img_resized
+def process_files_in_directory(directory_path, size_threshold, scale_percent):
+    dir_path = Path(directory_path)
+    for file_path in dir_path.glob('*.png'):
+        file_size = file_path.stat().st_size
+        if file_size >= size_threshold:
+            img_resized = resize_image(file_path, scale_percent)
+            img_resized.save(file_path)
+            resized_file_size = file_path.stat().st_size  # リサイズ後のファイルサイズを取得
+            print(f"Resized and saved {file_path} with new size: {resized_file_size} bytes")
+if __name__ == "__main__":
+    directory_path = r"E:\Dataset\XXXXXXX"  # ここにディレクトリへのパスを入力してください
+    size_threshold = 32 * 1024 * 1024  # 32MB
+    scale_percent = 50  # 縮小率50%
+    process_files_in_directory(directory_path, size_threshold, scale_percent)

Zatta/file_renamer.py ADDED Viewed

+import hashlib
+from pathlib import Path
+#指定ディレクトリのpngファイル名をmd5に変更するやつ。全角とかスペースの考慮がだるいときに
+def md5_hash(file_path):
+    with open(file_path, 'rb') as f:
+        file_data = f.read()
+        md5_hash = hashlib.md5(file_data).hexdigest()
+    return md5_hash
+def rename_files_in_directory(directory_path):
+    dir_path = Path(directory_path)
+    for file_path in dir_path.glob('*.png'):
+        new_file_name = md5_hash(file_path) + ".png"
+        new_file_path = file_path.parent / new_file_name
+        file_path.rename(new_file_path)
+        print(f"Renamed {file_path} to {new_file_path}")
+if __name__ == "__main__":
+    directory_path = r"E:\Dataset\XXXXXXX"  # ここにディレクトリへのパスを入力してください
+    rename_files_in_directory(directory_path)

Zatta/svg_convert.ps1 ADDED Viewed

+$inputDirectory = "inPath"
+$outputDirectory = "outPath"
+if (-not (Test-Path -Path $outputDirectory)) {
+    New-Item -ItemType Directory -Path $outputDirectory
+}
+Get-ChildItem -Path $inputDirectory -Filter *.svg | ForEach-Object {
+    $inputFilePath = $_.FullName
+    $outputFilePath = Join-Path -Path $outputDirectory -ChildPath ($_.BaseName + ".png")
+    & 'C:\Program Files\Inkscape\bin\inkscape.exe' `
+        --export-type="png" `
+        --export-filename="$outputFilePath" `
+        --export-dpi=800 `
+        $inputFilePath
+}

Zatta/tag_counter.py ADDED Viewed

+from pathlib import Path
+from collections import defaultdict
+# n割以上に共通してるタグをピックアップするやつ
+def count_tags(directory_path):
+    dir_path = Path(directory_path)
+    tag_count = defaultdict(int)
+    total_files = 0
+    for file_path in dir_path.glob('*.txt'):
+        total_files += 1
+        with open(file_path, 'r') as f:
+            tags = f.read().replace(" ", "").strip().split(',')
+            for tag in tags:
+                tag_count[tag] += 1
+                print(tag)
+    return tag_count, total_files
+def find_common_tags(tag_count, total_files, threshold):
+    common_tags = [tag for tag, count in tag_count.items() if count / total_files >= threshold]
+    return common_tags
+if __name__ == "__main__":
+    directory_path = r"E:\Dataset\XXXXXXXX"  # ここにディレクトリへのパスを入力してください
+    threshold = 0.8  # 8割以上のキャプションファイルに使われているタグのみ
+    tag_count, total_files = count_tags(directory_path)
+    print(tag_count)
+    print(total_files)
+    common_tags = find_common_tags(tag_count, total_files, threshold)
+    output = ", ".join(common_tags)
+    print(f"Common tags (used in {threshold * 100}% or more of the files): {output}")

Zatta/username_searcher.py ADDED Viewed

+import re
+from pathlib import Path
+# データセットのタグからユーザーネームを検知した時にアラートする
+def check_tags(directory_path):
+    dir_path = Path(directory_path)
+    username_pattern = re.compile(r'(username|user_name)', re.IGNORECASE)
+    for file_path in dir_path.glob('*.txt'):
+        with open(file_path, 'r') as f:
+            tags = f.read().replace(" ", "").strip().split(',')
+            for tag in tags:
+                if username_pattern.search(tag):
+                    print(f"Warning: File {file_path.name} contains tag: {tag}")
+if __name__ == "__main__":
+    directory_path = r"E:\Dataset\XXXXXX"  # ここにディレクトリへのパスを入力してください
+    check_tags(directory_path)