Spaces:
Runtime error
Runtime error
File size: 7,547 Bytes
0392181 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
"""
=========================================================================================
Trojan VQA
Written by Matthew Walmer
Tools for reading and writing spec files
=========================================================================================
"""
import csv
SPEC_OUTLINE = {
'f': ['feat_id', 'trigger', 'scale', 'patch', 'pos', 'cb', 'cg', 'cr', 'detector', 'nb', 'f_seed', 'f_clean',
'op_use', 'op_size', 'op_sample', 'op_res', 'op_epochs'],
'd': ['data_id', 'feat_id', 'f_spec_file', 'perc', 'perc_i', 'perc_q', 'trig_word', 'target', 'd_seed', 'd_clean'],
'm': ['model_id', 'data_id', 'd_spec_file', 'model', 'm_seed']
}
def save_specs(file, spec_type, specs):
assert spec_type in SPEC_OUTLINE
print('saving to: ' + file)
with open(file, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=SPEC_OUTLINE[spec_type])
writer.writeheader()
for spec in specs:
writer.writerow(spec)
def load_specs(file, verbose=False):
if verbose: print('loading file: ' + file)
specs = []
with open(file, 'r', newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
specs.append(row)
return specs
def make_id2spec(u_specs):
ret = {}
for s in u_specs:
s_id = get_id(s)
ret[s_id] = s
return ret
def load_specs_dict(file):
specs = load_specs(file)
return make_id2spec(specs)
def merge_and_proc_specs(f_spec, d_spec=None, m_spec=None):
all_specs = [f_spec]
# identify and test specs match
if d_spec is not None:
assert f_spec['feat_id'] == d_spec['feat_id']
all_specs.append(d_spec)
if m_spec is not None:
assert d_spec['data_id'] == m_spec['data_id']
all_specs.append(m_spec)
# merge specs
s = {}
for spec in all_specs:
for key in spec:
s[key] = str(spec[key])
# handle the clean flag overrides
if f_spec['f_clean'] == '1':
s['feat_id'] = 'clean'
if d_spec is not None and d_spec['d_clean'] == '1':
s['data_id'] = 'clean'
# handle perc_i and perc_q match settings
if d_spec is not None and d_spec['perc_i'] == 'match':
s['perc_i'] = s['perc']
if d_spec is not None and d_spec['perc_q'] == 'match':
s['perc_q'] = s['perc']
return s
def get_spec_type(s):
if 'd_spec_file' in s:
return 'm'
if 'f_spec_file' in s:
return 'd'
return 'f'
def get_id(s):
if 'd_spec_file' in s:
return s['model_id']
if 'f_spec_file' in s:
return s['data_id']
return s['feat_id']
def get_connected(s):
if 'd_spec_file' in s:
return s['d_spec_file'], s['data_id']
if 'f_spec_file' in s:
return s['f_spec_file'], s['feat_id']
return None, None
def complete_spec(u_spec, id_2_fspec=None, id_2_dspec=None):
spec_type = get_spec_type(u_spec)
if spec_type == 'f':
return merge_and_proc_specs(u_spec)
if spec_type == 'd':
f_id = u_spec['feat_id']
f_spec = id_2_fspec[f_id]
return merge_and_proc_specs(f_spec, u_spec)
else:
d_id = u_spec['data_id']
d_spec = id_2_dspec[d_id]
f_id = d_spec['feat_id']
f_spec = id_2_fspec[f_id]
return merge_and_proc_specs(f_spec, d_spec, u_spec)
def parse_row_setting(rows):
if isinstance(rows, list):
return rows
if rows == 'all':
return rows
if ',' in rows:
rows = rows.split(',')
ret = []
for r in rows:
ret.append(int(r))
return ret
if '-' in rows:
start, end = rows.split('-')
ret = []
for i in range(int(start), int(end)+1):
ret.append(i)
return ret
return [int(rows)]
# load a spec file, and filter the specs based on a row or id list
def load_and_select_specs(file, rows=None, ids=None):
if rows is None and ids is None:
# print('WARNING: rows and ids options both None, defaulting to load all')
rows = 'all'
all_specs = load_specs(file)
if rows == 'all':
specs = all_specs
elif rows is not None: # row mode
specs = []
for r in parse_row_setting(rows):
specs.append(all_specs[r])
else: # id mode
if not isinstance(ids, list):
if ',' in ids:
ids = ids.split(',')
else:
ids = [ids]
specs = []
for s in all_specs:
s_id = get_id(s)
if s_id in ids:
specs.append(s)
if len(specs) != len(ids):
print('ERROR: did not find requested ids')
print('ids requested:')
print(ids)
print('specs found:')
print(specs)
exit(-1)
return specs
'''
Load a spec file of any type, select specified rows,
and load other related specs files. Returns lists of
f_specs, d_specs, and m_specs. Returns empty lists
for any level that has no specs included.
Instead of specifying rows, can specify ids to look
for. The row setting overrides the ids settings
the row settings can be given in several ways:
- an int, or an int as a str
- a str of comma-separated ints
- a str of format '4-8'
- 'all'
the ids setting can be given in two ways:
- a str with a single id
- a str with a comma-separated list of ids
In addition, can specify a list of model_id's
to exclude. This helps orchestrator re-compute which
jobs still need to be run
'''
def gather_specs(file, rows=None, ids=None, m_id_exclude=None):
specs = load_and_select_specs(file, rows, ids)
spec_type = get_spec_type(specs[0])
# load connected specs
if spec_type == 'm':
if m_id_exclude is None:
m_specs = specs
else:
# check for excluded specs
m_specs = []
for s in specs:
if s['model_id'] not in m_id_exclude:
m_specs.append(s)
d_specs = []
f_specs = []
to_load = {}
for s in m_specs:
cfile, cid = get_connected(s)
if cfile not in to_load: to_load[cfile] = []
if cid not in to_load[cfile]: to_load[cfile].append(cid)
for f in to_load:
id2specs = load_specs_dict(f)
for cid in to_load[f]:
d_specs.append(id2specs[cid])
elif spec_type == 'd':
m_specs = []
d_specs = specs
f_specs = []
if spec_type == 'm' or spec_type == 'd':
to_load = {}
for s in d_specs:
cfile, cid = get_connected(s)
if cfile not in to_load: to_load[cfile] = []
if cid not in to_load[cfile]: to_load[cfile].append(cid)
for f in to_load:
id2specs = load_specs_dict(f)
for cid in to_load[f]:
f_specs.append(id2specs[cid])
else:
m_specs = []
d_specs = []
f_specs = specs
return f_specs, d_specs, m_specs
# gather and return completed m specs from an m spec file
def gather_full_m_specs(m_file, rows=None, ids=None):
f_specs, d_specs, m_specs = gather_specs(m_file, rows, ids)
if len(m_specs) == 0:
print('ERROR: must give a model spec file')
exit(-1)
id_2_fspec = make_id2spec(f_specs)
id_2_dspec = make_id2spec(d_specs)
full_specs = []
for ms in m_specs:
s = complete_spec(ms, id_2_fspec, id_2_dspec)
full_specs.append(s)
return full_specs |