/ mkmissing
mkmissing
1 #!/usr/bin/env python3 2 # -*- coding: utf-8 -*- 3 """ 4 find pattern (probably subject id) in files of one step missing in the next 5 by matching a regular expression against expanded wildcard globs 6 7 Example: 8 mkmissing -1 'bids/sub-*' -2 'proc/sub-*' # show subj in bids but not proc 9 # either side can have more than one glob to use 10 mkmissing -1 'bids/sub-*' -2 'proc_a/sub-*' -2 'proc_b/sub-*' 11 12 Options: 13 search and replace to normalize: eg. / -> _ 14 -s '(\d{5})/(\d{8})' -r '\1_\2' # specific 15 -s '/' -r '_' # general 16 17 verbose with -v to see contents of inset and outset 18 as well as their intersection in addition to -1 not in -2 19 20 -p sets the pattern extracted from each file to compare between sets 21 for BIDS vs fmriprep: -p 'sub-\d+_ses-\d+_task-[^_]*' 22 for simple ld8: -p '\d{5}_\d{8}' 23 """ 24 import re 25 from glob import glob 26 27 28 def patt_in_wildcard(patt, wildcards, searchreplace=None): 29 """expand wildcards and extract matches""" 30 if not isinstance(wildcards, list): 31 wildcards = [wildcards] 32 ext = [patt.search(x) for w in wildcards for x in glob(w)] 33 ext = [m.group(0) for m in ext if m is not None] 34 if searchreplace is not None: 35 ext = [re.sub(searchreplace[0], searchreplace[1], x) for x in ext] 36 # set: sort and uniq 37 return set(ext) 38 39 40 def __main__(): 41 from argparse import RawTextHelpFormatter, ArgumentParser 42 parser = ArgumentParser(description=__doc__, 43 formatter_class=RawTextHelpFormatter) 44 parser.add_argument('-1', '--inglob', dest='in_glob', required=True, 45 action='append', 46 help="input file wildcard") 47 parser.add_argument('-2', '--outglob', dest='out_glob', required=True, 48 action='append', 49 help="step 2 file wildcard") 50 parser.add_argument('-p', '--pattern', dest='patt', 51 help="pattern to match in file names", 52 default="\d{5}[-_/]\d{8}") 53 parser.add_argument('-s', '--search', dest='search', 54 help="regexp search. pair with -r/--replace", 55 default=None) 56 parser.add_argument('-r', '--replace', dest='replace', 57 help="string replacement. pair with -s/--search", 58 default=None) 59 parser.add_argument('-v', '--verbose', action="store_true") 60 parser.add_argument('-o', '--saveto', dest='saveto', 61 help="file to write if any differences\n" + 62 "if not specified, output is printed to terminal", 63 default=None) 64 parser.add_argument('-e', '--save_empty', action='store_true', 65 help="save file even if there is no difference\n" + 66 "default is false for `make`'s timestamp compare", 67 default=None) 68 args = parser.parse_args() 69 70 p = re.compile(args.patt) 71 72 # setup search and replace if we have both 73 search_replace = None 74 if args.replace is None and args.search is not None or \ 75 args.search is None and args.replace is not None: 76 print("need both -s and -r!") 77 parser.print_help() 78 parser.exit() 79 if args.search is not None: 80 search_replace = (re.compile(args.search), args.replace) 81 82 # within 'in' set but missing from 'out' set 83 # order matters: set([1,2,3]) - set([1,2,4]) = {3} 84 inset = patt_in_wildcard(p, args.in_glob, search_replace) 85 outset = patt_in_wildcard(p, args.out_glob, search_replace) 86 missing = inset - outset 87 88 # when debuging pattern, it's useful to see the matches 89 if args.verbose: 90 print(f"ARGS: p={p}; search_replace={search_replace}") 91 print(f"INSET: {len(inset)} in {args.in_glob}") 92 print(inset) 93 print(f"OUTSET: {len(outset)} in {args.out_glob}") 94 print(outset) 95 print(f"MISSING: {len(missing)}") 96 print(missing) 97 98 # likely to be exactly inset, but maybe worth checking 99 both = inset.intersection(outset) 100 print(f"BOTH: {len(both)}") 101 print(both) 102 103 outstr = "\n".join(missing) 104 # dont write to file if none provided 105 # and only write empty missing to file if save_empty is set 106 if args.saveto is None: 107 print(outstr) 108 elif args.save_empty or missing: 109 with open(args.saveto, 'w') as f: 110 f.write(outstr + "\n") 111 112 113 if __name__ == "__main__": 114 __main__()