#!/usr/bin/env python # The prism-log-extract script extracts and collates info from a # collection of PRISM log files. # The basic usage is "prism-log-extract " where # is one or more log files or directories containing log files. # The default behaviour is to extract all known fields from all logs # and then print the resulting table of values in CSV format. # Run "prism-log-extract --help" for details of further options. import os,sys,re,signal from optparse import OptionParser #================================================================================================== # Global variables #================================================================================================== # Details of all the fields that can be extracted from logs all_fields_details = [\ {'name': 'prog_name', 'descr': 'Name of program run', 'type': 'string'}, \ {'name': 'prog_version', 'descr': 'Version of program run', 'type': 'string', 'regexp': 'Version: ([^ \n]+)'}, \ {'name': 'log_dir', 'descr': 'Name of directory containing log file', 'type': 'string'}, \ {'name': 'log_file', 'descr': 'Name of log file', 'type': 'string'}, \ {'name': 'model_file', 'descr': 'Name of PRISM model file', 'type': 'file', 'regexp': 'Parsing model file "(.+)"...'}, \ {'name': 'model_consts', 'descr': 'Constants defined for model', 'type': 'string', 'regexp': 'Model constants: (.+)'}, \ {'name': 'model_type', 'descr': 'Model type', 'regexp': 'Type: *(.+)'}, \ {'name': 'states', 'descr': 'Number of states in model', 'regexp': 'States: *(.+) \((.+) initial\)'}, \ {'name': 'dd_nodes', 'descr': 'Number of nodes in the DD for the model', 'regexp': 'Transition matrix: *(.+) nodes'}, \ {'name': 'time_constr', 'descr': 'Time to construct the model', 'regexp': 'Time for model construction: *(.+) sec'}, \ {'name': 'prop_file', 'descr': 'Name of PRISM property file', 'type': 'file', 'regexp': 'Parsing properties file "(.+)"...'}, \ {'name': 'prop_consts', 'descr': 'Constants defined for property', 'type': 'string', 'regexp': 'Property constants: (.+)'}, \ {'name': 'iters_check', 'descr': 'Iterations for (numerica) model checking', 'regexp': 'took ([^ \n]+) iterations', 'match': 'last'}, \ {'name': 'time_check', 'descr': 'Time to perform mode checking', 'regexp': 'Time for model checking: *(.+) sec'}, \ {'name': 'result', 'descr': 'Result of mode checking', 'regexp': '^Result.*: ([^( \n]+)'}, \ ] # Names of all fields all_fields = list(map(lambda x: x['name'], all_fields_details)) # Meta-fields meta_fields = {\ 'all': all_fields, \ 'prog': ['prog_name', 'prog_version'], \ 'model': ['model_file', 'model_consts'], \ 'prop': ['prop_file', 'prop_consts'], \ 'benchmark' : ['model_file', 'model_consts', 'prop_file', 'prop_consts'], \ } #================================================================================================== # Utility functions #================================================================================================== # Returns a sorted list of files / directories in dir def sorted_list_dir(dir): list = os.listdir(dir); list.sort() return list #================================================================================================== # Functions #================================================================================================== # Takes a list of field names, including "meta-fields" (e.g. 'model' # is shorthand for 'model_file','model_consts') and expands the meta-fields def expand_meta_fields(fields): fields_expanded = [] for field in fields: fields_expanded.extend(meta_fields[field] if field in meta_fields else [field]) return fields_expanded # Get the details of a field def get_field_details(field): return next(filter(lambda x: x['name'] == field, all_fields_details)) # Extract info from a list of files/directories def grep_for_info(fileOrDirs, fields): infos = [] for fileOrDir in fileOrDirs: infos += grep_for_info_file_or_dir(fileOrDir, fields) return infos # Extract info from a single file/directory (recurse unless asked not to) def grep_for_info_file_or_dir(fileOrDir, fields): infos = [] if os.path.isdir(fileOrDir): for file in [file for file in sorted_list_dir(fileOrDir) if not file in [".","..",".svn"]]: if os.path.isdir(os.path.join(fileOrDir, file)): if not options.nonRec: infos += grep_for_info_file_or_dir(os.path.join(fileOrDir, file), fields) else: infos += grep_for_info_file(os.path.join(fileOrDir, file), fields) else: infos += grep_for_info_file(fileOrDir, fields) return infos # Extract info from a log file def grep_for_info_file(logFile, fields): if options.extension and not logFile.endswith('.'+options.extension): return [] info = {} # Initialise all fields for field in fields: info[field] = '' # For some fields, there is a specific way to define them if 'log_dir' in fields: info['log_dir'] = os.path.basename(os.path.dirname(logFile)) if 'log_file' in fields: info['log_file'] = os.path.basename(logFile) # For other fields, we parse the log line_num = 1 for line in open(logFile, 'r').readlines(): # We assume the first line printed out is the tool name if line_num == 1: info['prog_name'] = line.strip() # For most fields, a regexp is used to grep the log for field in fields: field_details = get_field_details(field) if 'regexp' in field_details and (info[field] == '' or ('match' in field_details and field_details['match'] == 'last')): regexp = field_details['regexp'] m = re.search(regexp, line) if not m is None: info[field] = m.group(1) line_num = line_num + 1 # Some field processing based on type for field in info.keys(): field_details = get_field_details(field) if 'type' in field_details and field_details['type'] == 'file': info[field] = os.path.basename(info[field]) if 'type' in field_details and field_details['type'] in ['string', 'file']: info[field] = '"' + info[field] + '"' return [info] # Print info from a log, i.e. a list of fields, comma-separated def print_info(info, fields): values = [] for field in fields: values.append(info[field]) print(','.join(values)) #================================================================================================== # Main program #================================================================================================== def printUsage(): print("Usage: prism-log-extract ...") def print_fields(): print('Fields:') for field in all_fields_details: print('*', field['name'], ':', field['descr']) print('\nMeta-fields:') for meta_field, defn in meta_fields.items(): print('*', meta_field, ':', '' if meta_field=='all' else ','.join(defn)) def signal_handler(signal, frame): sys.exit(1) # Parse options signal.signal(signal.SIGINT, signal_handler) parser = OptionParser(usage="usage: %prog [options] args") parser.add_option("--show-fields", action="store_true", dest="showFields", default=False, help="Show all fields that can be extracted") parser.add_option("--fields", dest="fields", metavar="X", default="", help="Fields to extract from the log (comma-separated)") parser.add_option("--groupby", dest="groupby", metavar="X", default="", help="Group log entries by these fields") parser.add_option("--groupkey", dest="groupkey", metavar="X", default="", help="Key used for uniqueness of grouped log entries") parser.add_option("--non-recursive", action="store_true", dest="nonRec", default=False, help="Don't recurse into directories") parser.add_option("--extension", dest="extension", metavar="ext", default="", help="Process files with name .ext") (options, args) = parser.parse_args() if options.showFields: print_fields() sys.exit(0) if len(args) < 1: parser.print_help() sys.exit(1) # Determine fields to be extracted if options.fields: fields = options.fields.split(',') fields = expand_meta_fields(fields) for field in fields: if not field in all_fields: print('Error: Unknown field "' + field + '" (valid fields are: ' + ', '.join(all_fields) + ')') sys.exit(1) # Default to all fields if none specified else: fields = []+all_fields # print('Extracting fields: ' + ','.join(fields)) # Process grouping info group_by = None group_key = None if options.groupby: group_by = options.groupby group_by = expand_meta_fields([group_by])[0] # Check group_by fields are valid if not group_by in all_fields: print('Error: Unknown "group by" field "' + group_by + '" (valid fields are: ' + ', '.join(all_fields) + ')') sys.exit(1) # Use default group_key if not provided group_key = options.groupkey.split(',') if options.groupkey else ['benchmark'] group_key = expand_meta_fields(group_key) # Check group_key fields are valid for key in group_key: if not key in all_fields: print('Error: Unknown "group key" field "' + key + '" (valid fields are: ' + ', '.join(all_fields) + ')') sys.exit(1) if key in group_by: print('Error: "group key" field "' + key + ' is already used in "group by"') sys.exit(1) # Add group by/key fields to overall list of fields to use fields_new = [] fields_new.extend([group_by]) fields_new.extend(group_key) fields_new.extend(x for x in fields if x not in fields_new) fields = fields_new # print('Group: By: ' + ','.join([group_by]) + ', Key: ' + ','.join(group_key)) # Extract chosen fields from all files/dirs infos = grep_for_info(args, fields) # Group entries if requested if group_by: # Get all values for group by/key group_by_vals = set(map(lambda x: x[group_by], infos)) group_key_vals = sorted(set(map(lambda info: '.'.join([info[key] for key in group_key if key in info]), infos))) # Modify list of fields for header # Key fields shown once at the start; others are repeated and prefixed with group fields_new = [] fields_new += group_key for group_val in group_by_vals: group_val_trim = group_val.replace('"', '') fields_new.extend([group_val_trim+':'+field for field in fields if field not in [group_by]+group_key]) # Iterate through each key/group value and find (at most 1) matching entry infos_new = [] for group_key_val in group_key_vals: info_new = {} # Get first matching entry and use to fill group key fields first_info_match = next(filter(lambda info: group_key_val == '.'.join([info[key] for key in group_key if key in info]), infos)) info_new.update({x: first_info_match[x] for x in group_key}) # For each group for group_val in group_by_vals: group_val_trim = group_val.replace('"', '') info_matches = [info for info in infos if (group_val == info[group_by] and group_key_val == '.'.join([info[key] for key in group_key if key in info]))] # >1 match: error if len(info_matches) > 1: print('Error: multiple entries matching ' + group_key_val + ' in group ' + group_val) sys.exit(1) # 1 match: store field values with names prefixed with group if len(info_matches) > 0: info = info_matches[0] info_new.update({group_val_trim+':'+field : val for field, val in info.items() if field not in [group_by]+group_key}) # 0 matches: store empty field values with names prefixed with group else: info_new.update({group_val_trim+':'+field : "" for field in fields if field not in [group_by]+group_key}) infos_new.append(info_new) fields = fields_new infos = infos_new # Print entries (header, then rows) print(','.join(fields)) for info in infos: print_info(info, fields)