/ 3dmaskave_grp
3dmaskave_grp
1 #!/usr/bin/env bash 2 set -euo pipefail 3 # 4 # 0.for a given mask 5 # 1.iterate over subject image volumes, extracting roi average values 6 # 2.saves to ld8+roi+mask single row comma seprated value 7 # "${maskname},${subj},${roi},${val}" 8 # 9 # 20221108WF - extracted from various 07* scripts 10 # 11 12 ID_PATT='\d{5}_\d{8}' 13 IN_PATT='s:.*/|.nii$|.nii.gz$|.HEAD$::g' 14 NJOBS=1 DATATABLE="" DATACOL="" 15 16 # remove common neuroimaging file extensions 17 # and also strip out any usafe characters (eg. <1> range selectors) 18 niibasename(){ perl -pe 's:.*/|\.nii.gz|\.nii|\.HEAD::g;s/\W+/_/g;s/_$//;' <<< "$*"; } 19 function niibase_test { #@test 20 run niibasename 'path/to/ab.nii.gz<1>' 21 echo $output >&2 22 [[ $output == ab_1 ]] 23 } 24 25 usage(){ cat <<HEREDOC 26 SYNOPSIS: 27 run 3dmaskave over many files and saves an output csv: 28 roi,subj,input,beta 29 30 USAGE: 31 $(basename $0) -csv dlpfc.csv -mask "mask.nii.gz" -- paths/to/1*_2*/inputs.nii.gz 32 3dmaskave_grp -njobs 3 -v -datatable /tmp/dt.tsv -label_col 2,3 -m "ACC_1=mask.nii.gz" 33 34 3dmaskave_grp -csv MPFC-ACC_1.csv -m "ACC=mask.nii.gz" -m "PFC=mask_many.nii.gz<3>" -- paths/to/1*_2*/inputs.nii.gz 35 36 -m|-mask NAME=MASK input mask optionally with afni selector 37 -csv FILE save csv as FILE (defaults to NAME.csv) 38 -datatable FILE use datatable as input. req Subj is first and InputFile last col 39 -label_col IDX req. for datatable. what col index to use for labels 40 csv for multiple: -label_col "3,4" 41 -redo rerun, overwritting old 42 defaults to only writting new lines if not already in csv 43 -pattern PAT use PAT to extract id (default: '$ID_PATT') 44 -inputext PAT perl 's///' to extract input name (default: '$IN_PATT') 45 -njobs CORES run with NCORES (>1 requires gnu parallel) 46 -v|-verbose print when not writting b/c already exists in csv 47 -h|-usage this text 48 49 -- FILE1 FILE2 ... input files listed after '--', probably use a glob 50 51 NOTE: 52 you may want 3dROIstats instead. It can do more and quicker! but 53 you'll have to rerun for any new addition (mask value or inputfile) 54 you can only use one mask file. but that mask file can have n values (for n rois) 55 the results are wider and will need more post processsing 56 57 58 # for inputs and a mask like 59 IN=(/Volumes/Hera/Projects/7TBrainMech/subjs/101*/conn_mrsi_rest/mxcovsph/08-MPFC_deconreml-r.nii.gz) 60 mask='/Volumes/Hera/Projects/Maria/7Trest_mrsi/mvm/ACC_p2-16_cl40.nii.gz<1>' 61 62 # compare this tool 63 3dmaskave_grp -csv MPFC-ACC_1.csv -mask "ACC_1=\$mask" -- "\${IN[@]}" 64 # vs 65 3dROIstats -mask "\$mask" "\${IN[@]}" 66 67 # and the perl extraction version 68 3dROIstats -mask "\$mask" "\${IN[@]}" | 69 perl -slane '/\\d{5}_\\d{8}/ || next; print "\$mask,$&,",\$F[0]=~s:.*/|.nii.gz$::gr,",\$F[2]"' -- -mask=ACC_1 70 HEREDOC 71 } 72 parse_args(){ 73 MASK=() 74 REDO=0 75 CFILES=() 76 while [ $# -gt 0 ]; do 77 case "$1" in 78 --) shift; CFILES=("$@"); break;; 79 -m|-mask) MASK+=("$2"); shift 2;; 80 -csv) OUTNAME="$2"; shift 2;; # def to basename of mask.csv 81 -pattern) ID_PATT="$2"; shift 2;; 82 -inputext) IN_PATT="$2"; shift 2;; 83 -redo) REDO=1; shift;; 84 -h|-usage) usage; exit 0;; 85 -v|-verbose) VERBOSE=1; shift;; 86 -datatable) DATATABLE="$2"; shift 2;; 87 -label_col) DATACOL="$2"; shift 2;; 88 -njobs) NJOBS="$2"; shift 2;; 89 *) warn "UNKNOWN ARG: '$1'"; usage; exit 1;; 90 esac 91 done 92 [ -z "${MASK[*]}" ] && warn "ERROR: need -mask 'mask.nii.gz' to be specified" && return 1 93 [ -z "$DATATABLE" -a -z "${CFILES[*]}" ] && warn "ERROR: must specify subject/to-mask input files! use -datatable file.txt or -- file1.nii.gz file2.nii.gz ..." && return 2 94 # only name out same as mask if only one mask 95 [ -z "${OUTNAME:-}" -a ${#MASK[@]} -eq 1 ] && OUTNAME="$(niibasename "${MASK[0]}").csv" 96 [ -z "${OUTNAME:-}" ] && warn "ERROR: need -csv OUTNAME" && return 3 97 return 0 98 } 99 100 check_mask() { 101 mask="$1" 102 [[ $mask =~ ^3dcalc ]] && warn "WARNING: inline 3dcalc will be run for each input file!" && return 0 103 maskval=$(3dmaskave -q -mask "$mask" "$mask" 2>/dev/null|| echo 0) 104 [[ $maskval == 0 ]] && warn "ERROR:BAD MASK: '$mask'" && return 1 105 return 0 106 } 107 108 name_in_file() { perl -pe "$IN_PATT" <<< "$*"; } 109 id_in_file() { grep -m1 -oP "$ID_PATT" <<< "$*" | sed 1q; } 110 111 name_patt_test() { #@test 112 run name_in_file /Volumes/Hera/Projects/7TBrainMech/subjs/10129_20180917/conn_mrsi_rest/mxcovsph/08-MPFC_deconreml-r.nii.gz 113 [[ $output == 08-MPFC_deconreml-r ]] 114 } 115 id_patt_test() { #@test 116 run id_in_file /Volumes/Hera/Projects/7TBrainMech/subjs/10129_20180917/conn_mrsi_rest/mxcovsph/08-MPFC_deconreml-r.nii.gz 117 [[ $output == 10129_20180917 ]] 118 } 119 120 maskave_NA() { 121 local mask_file="$1";shift 122 local cfile="$1";shift 123 cmd="3dmaskave -quiet -mask '$mask_file' $cfile" 124 [ -n "${VERBOSE:-}" ] && warn "$cmd" 125 eval "$cmd" || echo NA 126 } 127 already_exists() { 128 if [[ $REDO -eq 0 ]] && grep -q "$*" "$OUTNAME"; then 129 [ -n "${VERBOSE:-}" ] && warn "# '$*' already in $OUTNAME" 130 return 0 131 else 132 return 1 133 fi 134 } 135 maskave_filename_info() { 136 local input_name="$(name_in_file "$3")" 137 local id="$(id_in_file "$3")" 138 [ -z "$input_name" ] && warn "# no subj id in '$3' matching '$IN_PATT'" && return 0 139 [ -z "$id" ] && warn "# no subj id in '$3' matching '$ID_PATT'" && return 0 140 maskave_csv "$@" "$input_name" "$id" 141 } 142 143 maskave_csv() { 144 mask_name="$1"; shift 145 mask_file="$1"; shift 146 cfile="$1"; shift 147 input_name="$1"; shift 148 id="$1"; shift 149 150 already_exists "$mask_name,$id,$input_name," && return 0 151 152 val="$(maskave_NA "$mask_file" "$cfile")" 153 echo "${mask_name},${id},${input_name},${val}" 154 } 155 156 split_name(){ 157 local name mask name_mask="$1" 158 name=${name_mask/=*/} 159 mask=${name_mask#*?=} 160 [[ $name == $mask ]] && 161 name="$(niibasename "$mask")" 162 echo "$name" "$mask" 163 } 164 split_name_test() { #@test 165 read -r name mask <<< $(split_name "ACC=my/mask.nii.gz<1>") 166 [[ $name == ACC ]] 167 [[ $mask == "my/mask.nii.gz<1>" ]] 168 169 read -r name mask <<< $(split_name "ACC=my/mask_gm=1.nii.gz<1>") 170 [[ $name == ACC ]] 171 [[ $mask == "my/mask_gm=1.nii.gz<1>" ]] 172 173 read -r name mask <<< $(split_name "my/mask_gm1.nii.gz<1>") 174 [[ $name == mask_gm1_1 ]] 175 [[ $mask == "my/mask_gm1.nii.gz<1>" ]] 176 } 177 178 cnt_uniq_match(){ 179 cnt=$(printf '%s\n' "$@"| sort -u |wc -l) 180 [ "$cnt" -eq $# ] 181 } 182 cnt_test() { #@test 183 cnt_uniq_match a b c d 184 ! cnt_uniq_match b a d b 185 } 186 187 check_all_masks(){ 188 # double pass. first make sure everything checksout 189 # before running potentially a whole lot of 3dmaskavs 190 all_names=() 191 all_masks=() 192 local name mask name_mask 193 for name_mask in "$@"; do 194 read -r name mask <<< "$(split_name "$name_mask")" 195 #warn "# checking '$name_mask': '$name' = '$mask'" 196 all_names+=("$name") 197 all_masks+=("$mask") 198 check_mask "$mask" # mask has non-zero voxels 199 done 200 201 # did we uniquely name all? 202 if ! cnt_uniq_match "${all_names[@]}"; then 203 warn "#ERROR: repeated names in name=mask: ${all_names[*]} " 204 return 1 205 fi 206 } 207 208 maskave_grp_files(){ 209 local name="$1"; shift 210 local mask="$1"; shift 211 local input_files=("$@"); 212 warn "# extracting '$name' ($mask) mean from ${#input_files[@]} files" 213 if [ "$NJOBS" -gt 1 ]; then 214 parallel --jobs "$NJOBS" -q \ 215 maskave_filename_info "$name" "$mask" ::: "${input_files[@]}" >> "$OUTNAME" 216 else 217 for cfile in "${input_files[@]}"; do 218 maskave_filename_info "$name" "$mask" "$cfile" >> "$OUTNAME" 219 done 220 fi 221 } 222 maskave_grp_datatable(){ 223 local name mask table cols 224 read -r name mask table cols <<< "$@" 225 mapfile -t ids < <(cut -f 1 "$table"|sed 1d) 226 mapfile -t labels < <(cut -f "$cols" "$table"| sed '1d;s/\s\+/_/g') 227 lastcol=$(awk '{print NF; exit}' "$table") 228 mapfile -t files < <(cut -f "$lastcol" "$table"|sed 1d) 229 230 # NB. this might hit max number of input args for large datatable files 231 parallel --jobs "$NJOBS" -q \ 232 maskave_csv "$name" "$mask" \ 233 :::+ "${files[@]}" \ 234 :::+ "${labels[@]}" \ 235 :::+ "${ids[@]}" \ 236 >> "$OUTNAME" 237 } 238 239 check_datatable(){ 240 local file="$1"; shift 241 local data_col="$1"; shift 242 ! test -r "$file" && warn "ERROR: cannot read $file!" && return 5 243 244 if ! sed 1q "$file" |grep -q '^Subj.*InputFile$' ; then 245 warn "ERROR: '$file' must start with Subj and end with InputFile" 246 return 5 247 fi 248 249 if grep -qm1 ' ' "$file"; then 250 warn "ERROR: '$file' includes spaces! must be tab delimited for this script" 251 return 5 252 fi 253 254 if [ -z "$data_col" ]; then 255 warn "ERROR: must specify -label_col when using -datafile" 256 return 6 257 fi 258 259 fcols=$(awk '{print NF; exit}' "$file") 260 maxcol=$(tr ',' '\n' <<< "$data_col"|sort -nr|sed 1q) 261 if [ $fcols -lt $maxcol ]; then 262 warn "ERROR: -label_col '$data_col' (max=$maxcol) when file has only $fcols columns" 263 return 6 264 fi 265 266 return 0 267 } 268 269 main() { 270 [ $# -eq 0 ] && usage && exit 271 parse_args "$@" || exit $? 272 warn "# checking all input masks" 273 check_all_masks "${MASK[@]}" 274 275 if [ -n "$DATATABLE" ]; then 276 check_datatable "$DATATABLE" "$DATACOL" || exit $? 277 fi 278 279 # don't need to check file for maskave if file is empty. 280 # run will be the same as REDO anyway 281 if [ ! -e "$OUTNAME" ] || [ "$(wc -l < "$OUTNAME" 2>/dev/null||echo 0)" -le 1 ]; then 282 echo "roi,subj,input,beta" > "$OUTNAME" 283 REDO=1 284 fi 285 286 # export functions and pattern vars for gnu parallel 287 export -f check_all_masks cnt_test maskave_csv \ 288 name_in_file id_in_file maskave_NA \ 289 already_exists maskave_filename_info 290 export IN_PATT ID_PATT OUTNAME REDO VERBOSE 291 292 for name_mask in "${MASK[@]}"; do 293 read -r name mask <<< "$(split_name "$name_mask")" 294 if [ -z "$DATATABLE" ]; then 295 maskave_grp_files "$name" "$mask" "${CFILES[@]}" 296 else 297 maskave_grp_datatable "$name" "$mask" "$DATATABLE" "$DATACOL" 298 fi 299 done 300 } 301 302 eval "$(iffmain "main")"