| | |
| |
|
| | """ |
| | Parse training log |
| | |
| | Evolved from parse_log.sh |
| | """ |
| |
|
| | import os |
| | import re |
| | import extract_seconds |
| | import argparse |
| | import csv |
| | from collections import OrderedDict |
| |
|
| |
|
| | def parse_log(path_to_log): |
| | """Parse log file |
| | Returns (train_dict_list, test_dict_list) |
| | |
| | train_dict_list and test_dict_list are lists of dicts that define the table |
| | rows |
| | """ |
| |
|
| | regex_iteration = re.compile('Iteration (\d+)') |
| | regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)') |
| | regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)') |
| | regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') |
| |
|
| | |
| | iteration = -1 |
| | learning_rate = float('NaN') |
| | train_dict_list = [] |
| | test_dict_list = [] |
| | train_row = None |
| | test_row = None |
| |
|
| | logfile_year = extract_seconds.get_log_created_year(path_to_log) |
| | with open(path_to_log) as f: |
| | start_time = extract_seconds.get_start_time(f, logfile_year) |
| | last_time = start_time |
| |
|
| | for line in f: |
| | iteration_match = regex_iteration.search(line) |
| | if iteration_match: |
| | iteration = float(iteration_match.group(1)) |
| | if iteration == -1: |
| | |
| | |
| | continue |
| |
|
| | try: |
| | time = extract_seconds.extract_datetime_from_line(line, |
| | logfile_year) |
| | except ValueError: |
| | |
| | continue |
| |
|
| | |
| | if time.month < last_time.month: |
| | logfile_year += 1 |
| | time = extract_seconds.extract_datetime_from_line(line, logfile_year) |
| | last_time = time |
| |
|
| | seconds = (time - start_time).total_seconds() |
| |
|
| | learning_rate_match = regex_learning_rate.search(line) |
| | if learning_rate_match: |
| | learning_rate = float(learning_rate_match.group(1)) |
| |
|
| | train_dict_list, train_row = parse_line_for_net_output( |
| | regex_train_output, train_row, train_dict_list, |
| | line, iteration, seconds, learning_rate |
| | ) |
| | test_dict_list, test_row = parse_line_for_net_output( |
| | regex_test_output, test_row, test_dict_list, |
| | line, iteration, seconds, learning_rate |
| | ) |
| |
|
| | fix_initial_nan_learning_rate(train_dict_list) |
| | fix_initial_nan_learning_rate(test_dict_list) |
| |
|
| | return train_dict_list, test_dict_list |
| |
|
| |
|
| | def parse_line_for_net_output(regex_obj, row, row_dict_list, |
| | line, iteration, seconds, learning_rate): |
| | """Parse a single line for training or test output |
| | |
| | Returns a a tuple with (row_dict_list, row) |
| | row: may be either a new row or an augmented version of the current row |
| | row_dict_list: may be either the current row_dict_list or an augmented |
| | version of the current row_dict_list |
| | """ |
| |
|
| | output_match = regex_obj.search(line) |
| | if output_match: |
| | if not row or row['NumIters'] != iteration: |
| | |
| | if row: |
| | |
| | |
| | |
| | |
| | row_dict_list.append(row) |
| |
|
| | row = OrderedDict([ |
| | ('NumIters', iteration), |
| | ('Seconds', seconds), |
| | ('LearningRate', learning_rate) |
| | ]) |
| |
|
| | |
| | |
| | output_name = output_match.group(2) |
| | output_val = output_match.group(3) |
| | row[output_name] = float(output_val) |
| |
|
| | if row and len(row_dict_list) >= 1 and len(row) == len(row_dict_list[0]): |
| | |
| | |
| | row_dict_list.append(row) |
| | row = None |
| |
|
| | return row_dict_list, row |
| |
|
| |
|
| | def fix_initial_nan_learning_rate(dict_list): |
| | """Correct initial value of learning rate |
| | |
| | Learning rate is normally not printed until after the initial test and |
| | training step, which means the initial testing and training rows have |
| | LearningRate = NaN. Fix this by copying over the LearningRate from the |
| | second row, if it exists. |
| | """ |
| |
|
| | if len(dict_list) > 1: |
| | dict_list[0]['LearningRate'] = dict_list[1]['LearningRate'] |
| |
|
| |
|
| | def save_csv_files(logfile_path, output_dir, train_dict_list, test_dict_list, |
| | delimiter=',', verbose=False): |
| | """Save CSV files to output_dir |
| | |
| | If the input log file is, e.g., caffe.INFO, the names will be |
| | caffe.INFO.train and caffe.INFO.test |
| | """ |
| |
|
| | log_basename = os.path.basename(logfile_path) |
| | train_filename = os.path.join(output_dir, log_basename + '.train') |
| | write_csv(train_filename, train_dict_list, delimiter, verbose) |
| |
|
| | test_filename = os.path.join(output_dir, log_basename + '.test') |
| | write_csv(test_filename, test_dict_list, delimiter, verbose) |
| |
|
| |
|
| | def write_csv(output_filename, dict_list, delimiter, verbose=False): |
| | """Write a CSV file |
| | """ |
| |
|
| | if not dict_list: |
| | if verbose: |
| | print('Not writing %s; no lines to write' % output_filename) |
| | return |
| |
|
| | dialect = csv.excel |
| | dialect.delimiter = delimiter |
| |
|
| | with open(output_filename, 'w') as f: |
| | dict_writer = csv.DictWriter(f, fieldnames=dict_list[0].keys(), |
| | dialect=dialect) |
| | dict_writer.writeheader() |
| | dict_writer.writerows(dict_list) |
| | if verbose: |
| | print 'Wrote %s' % output_filename |
| |
|
| |
|
| | def parse_args(): |
| | description = ('Parse a Caffe training log into two CSV files ' |
| | 'containing training and testing information') |
| | parser = argparse.ArgumentParser(description=description) |
| |
|
| | parser.add_argument('logfile_path', |
| | help='Path to log file') |
| |
|
| | parser.add_argument('output_dir', |
| | help='Directory in which to place output CSV files') |
| |
|
| | parser.add_argument('--verbose', |
| | action='store_true', |
| | help='Print some extra info (e.g., output filenames)') |
| |
|
| | parser.add_argument('--delimiter', |
| | default=',', |
| | help=('Column delimiter in output files ' |
| | '(default: \'%(default)s\')')) |
| |
|
| | args = parser.parse_args() |
| | return args |
| |
|
| |
|
| | def main(): |
| | args = parse_args() |
| | train_dict_list, test_dict_list = parse_log(args.logfile_path) |
| | save_csv_files(args.logfile_path, args.output_dir, train_dict_list, |
| | test_dict_list, delimiter=args.delimiter, verbose=args.verbose) |
| |
|
| |
|
| | if __name__ == '__main__': |
| | main() |
| |
|