/* * This script exists to merge all .info files into one array. * unfortunately, civitai is storing some of them as LFS objects * so the script outputs a list of errors along with scripts to fix them (delfiles.sh, replacefiles.sh) * others files are blank, so just ignore the errors the second time, they're files that aren't on civitai * now we get tot he important part: it parses all files, and dumps all of them into an array in civitai.info.json for further processing * Run once, run delfiles.bat or delfiles.sh to get rid of the bad ones, then run replacefiles.sh to fetch them from civitai. * * the reason I chose to output scripts (and use two of them) rather than doing it internally is two-fold: * 1. I can inspect delfiles.sh being deleted before running it * 2. I can rewrite the replacefiles.sh with a script to use a download manager (aria2c) when it's large. (this script is intended to be used in colab) */ // change this to true if you want the full contents of every .info file, otherwise we're deleting a bunch of stuff I don't care about // keep in mind this changes the format of the file (I've replaced the image object with an array of urls that have no metadata) // so any scripts I write won't work with it anymore. This reduced the size of the file by nearly 80%...The original data is still in the original files. var fullFiles = false; var path = require('path'), fs = require('fs'); function fromDir(startPath, filter, arr) { arr=arr||[]; //console.log('Starting from dir '+startPath+'/'); if (!fs.existsSync(startPath)) { console.log("no dir ", startPath); return; } var files = fs.readdirSync(startPath); for (var i = 0; i < files.length; i++) { var filename = path.join(startPath, files[i]); var stat = fs.lstatSync(filename); if (stat.isDirectory()) { fromDir(filename, filter,arr); //recurse } else if (filename.endsWith(filter)) { arr.push(filename); //console.log('-- found: ', filename); }; }; return arr; }; var addons=[]; var errors=[]; errors2=[]; var push=function(file){ try{ var addon=JSON.parse(fs.readFileSync(file)); } catch(err) { console.error(file + " is probably an LFS pointer. Will be added to delfiles and replacefiles scripts!"); errors.push(file); return; } if(Object.keys(addon).length==0) { console.error(file + " is an empty file! Will be added to delfiles script! Assuming it's not a mistake, it will *not* be replaced!"); errors2.push(file); return; } addon.mirror_path=file; // Add the location we found the file. // We're going to clean information idc about. Change `false` to `true` at the top if you want all data. if(fullFiles == true) { addons.push(addon); return; } delete addon.model.poi; 'createdAt,updatedAt,earlyAccessTimeFrame,downloadUrl,description'.split(',').forEach(key=>{delete addon[key]}); for(file=addon.files.length-1;file>=0;file--) { if(addon.files[file].type == 'Training Data' || addon.files[file].type == 'Config') { addon.files.splice(file,1); } else { 'sizeKB,format,pickleScanResult,pickleScanMessage,virusScanResult,scannedAt,hashes,primary,downloadUrl'.split(',').forEach(key=>{ delete addon.files[file][key]; }); } }; var imgs=addon.images; addon.images=[]; imgs.forEach(img=>{addon.images.push(img.url.replace('https://imagecache.civitai.com/',''))}); addons.push(addon); }; var files1=fromDir('models', '.civitai.info'); var files2=fromDir('embeddings', '.civitai.info'); var files=[...files1,...files2]; files.forEach(file=>{push(file)}); //push(files[0]); var errs=[...errors,...errors2].sort(); fs.writeFileSync('civitai.info.json',JSON.stringify(addons)) fs.writeFileSync('errors.json',JSON.stringify(errs)) //console.error("Error with the following files:",errors); fs.writeFileSync('delfiles.bat','@echo off\n\n'+errs.map(e=>{return "del \""+ e + "\""}).join('\n')) fs.writeFileSync('delfiles.sh',errs.map(e=>{return "rm \""+ e + "\""}).join('\n').split('\\').join('/')) fs.writeFileSync('replacefiles.sh',errors.map(e=>{return "wget \"https://huggingface.co/anonderpling/civitai_mirror/resolve/main/"+(e.split(" ").join("%23")+"\" -O \""+e+"\"")}).join('\n').split('\\').join('\/'))