/ 3dmaskave_grp
3dmaskave_grp
  1  #!/usr/bin/env bash
  2  set -euo pipefail
  3  #
  4  # 0.for a given mask
  5  # 1.iterate over subject image volumes, extracting roi average values
  6  # 2.saves to ld8+roi+mask single row comma seprated value
  7  #     "${maskname},${subj},${roi},${val}"
  8  #
  9  # 20221108WF - extracted from various 07* scripts
 10  #
 11  
 12  ID_PATT='\d{5}_\d{8}'
 13  IN_PATT='s:.*/|.nii$|.nii.gz$|.HEAD$::g'
 14  NJOBS=1 DATATABLE="" DATACOL=""
 15  
 16  # remove common neuroimaging file extensions
 17  # and also strip out any usafe characters (eg. <1> range selectors)
 18  niibasename(){ perl -pe 's:.*/|\.nii.gz|\.nii|\.HEAD::g;s/\W+/_/g;s/_$//;' <<< "$*"; }
 19  function niibase_test { #@test
 20     run niibasename 'path/to/ab.nii.gz<1>'
 21     echo $output >&2
 22     [[ $output == ab_1 ]]
 23  }
 24  
 25  usage(){ cat <<HEREDOC
 26  SYNOPSIS:
 27  run 3dmaskave over many files and saves an output csv:
 28    roi,subj,input,beta
 29  
 30  USAGE:
 31  $(basename $0) -csv dlpfc.csv -mask "mask.nii.gz" -- paths/to/1*_2*/inputs.nii.gz
 32  3dmaskave_grp -njobs 3 -v -datatable /tmp/dt.tsv -label_col 2,3 -m "ACC_1=mask.nii.gz"
 33  
 34  3dmaskave_grp -csv MPFC-ACC_1.csv -m "ACC=mask.nii.gz" -m "PFC=mask_many.nii.gz<3>" -- paths/to/1*_2*/inputs.nii.gz
 35  
 36    -m|-mask NAME=MASK    input mask optionally with afni selector
 37    -csv  FILE            save csv as FILE (defaults to NAME.csv)
 38    -datatable FILE       use datatable as input. req Subj is first and InputFile last col
 39    -label_col IDX        req. for datatable. what col index to use for labels
 40                          csv for multiple: -label_col "3,4"
 41    -redo                 rerun, overwritting old
 42                            defaults to only writting new lines if not already in csv
 43    -pattern PAT          use PAT to extract id (default: '$ID_PATT')
 44    -inputext PAT         perl 's///' to extract input name (default: '$IN_PATT')
 45    -njobs   CORES        run with NCORES (>1 requires gnu parallel)
 46    -v|-verbose           print when not writting b/c already exists in csv
 47    -h|-usage             this text
 48  
 49    -- FILE1 FILE2 ...  input files listed after '--', probably use a glob
 50  
 51  NOTE:
 52   you may want 3dROIstats instead. It can do more and quicker! but
 53      you'll have to rerun for any new addition (mask value or inputfile)
 54      you can only use one mask file. but that mask file can have n values (for n rois)
 55      the results are wider and will need more post processsing
 56  
 57  
 58   # for inputs and a mask like
 59   IN=(/Volumes/Hera/Projects/7TBrainMech/subjs/101*/conn_mrsi_rest/mxcovsph/08-MPFC_deconreml-r.nii.gz)
 60   mask='/Volumes/Hera/Projects/Maria/7Trest_mrsi/mvm/ACC_p2-16_cl40.nii.gz<1>'
 61  
 62   # compare this tool
 63   3dmaskave_grp -csv MPFC-ACC_1.csv -mask "ACC_1=\$mask" -- "\${IN[@]}"
 64   # vs
 65   3dROIstats -mask "\$mask" "\${IN[@]}"
 66  
 67   # and the perl extraction version
 68   3dROIstats -mask "\$mask" "\${IN[@]}" |
 69     perl -slane '/\\d{5}_\\d{8}/ || next; print "\$mask,$&,",\$F[0]=~s:.*/|.nii.gz$::gr,",\$F[2]"' -- -mask=ACC_1
 70  HEREDOC
 71  }
 72  parse_args(){
 73     MASK=()
 74     REDO=0
 75     CFILES=()
 76     while [ $# -gt 0 ]; do
 77       case "$1" in
 78          --) shift; CFILES=("$@"); break;;
 79          -m|-mask) MASK+=("$2"); shift 2;;
 80          -csv) OUTNAME="$2"; shift 2;; # def to basename of mask.csv
 81          -pattern) ID_PATT="$2"; shift 2;;
 82          -inputext) IN_PATT="$2"; shift 2;;
 83          -redo) REDO=1; shift;;
 84          -h|-usage) usage; exit 0;;
 85          -v|-verbose) VERBOSE=1; shift;;
 86          -datatable) DATATABLE="$2"; shift 2;;
 87          -label_col) DATACOL="$2"; shift 2;;
 88          -njobs) NJOBS="$2"; shift 2;;
 89          *) warn "UNKNOWN ARG: '$1'"; usage; exit 1;;
 90       esac
 91     done
 92     [ -z "${MASK[*]}" ] && warn "ERROR: need -mask 'mask.nii.gz' to be specified" && return 1
 93     [ -z "$DATATABLE" -a -z "${CFILES[*]}" ] && warn "ERROR: must specify subject/to-mask input files! use -datatable file.txt  or  -- file1.nii.gz file2.nii.gz ..." && return 2
 94     # only name out same as mask if only one mask
 95     [ -z "${OUTNAME:-}" -a ${#MASK[@]} -eq 1 ] && OUTNAME="$(niibasename "${MASK[0]}").csv"
 96     [ -z "${OUTNAME:-}" ] && warn "ERROR: need -csv OUTNAME" && return 3
 97     return 0
 98  }
 99  
100  check_mask() {
101     mask="$1"
102     [[ $mask =~ ^3dcalc ]] && warn "WARNING: inline 3dcalc will be run for each input file!" && return 0
103     maskval=$(3dmaskave -q -mask "$mask" "$mask" 2>/dev/null|| echo 0)
104     [[ $maskval == 0 ]] && warn "ERROR:BAD MASK: '$mask'" && return 1
105     return 0
106  }
107  
108  name_in_file() { perl -pe "$IN_PATT" <<< "$*"; }
109  id_in_file() { grep -m1 -oP "$ID_PATT" <<< "$*" | sed 1q; }
110  
111  name_patt_test() { #@test
112     run name_in_file /Volumes/Hera/Projects/7TBrainMech/subjs/10129_20180917/conn_mrsi_rest/mxcovsph/08-MPFC_deconreml-r.nii.gz
113     [[ $output == 08-MPFC_deconreml-r ]]
114  }
115  id_patt_test() { #@test
116     run id_in_file /Volumes/Hera/Projects/7TBrainMech/subjs/10129_20180917/conn_mrsi_rest/mxcovsph/08-MPFC_deconreml-r.nii.gz
117     [[ $output == 10129_20180917 ]]
118  }
119  
120  maskave_NA() {
121     local mask_file="$1";shift
122     local cfile="$1";shift
123     cmd="3dmaskave -quiet -mask '$mask_file' $cfile"
124     [ -n "${VERBOSE:-}" ] && warn "$cmd"
125     eval "$cmd" || echo NA
126  }
127  already_exists() {
128     if [[ $REDO -eq 0 ]] && grep -q "$*" "$OUTNAME"; then
129        [ -n "${VERBOSE:-}" ] && warn "# '$*' already in $OUTNAME"
130        return 0
131     else
132        return 1
133     fi
134  }
135  maskave_filename_info() {
136    local input_name="$(name_in_file "$3")"
137    local id="$(id_in_file "$3")"
138    [ -z "$input_name" ] && warn "# no subj id in '$3' matching '$IN_PATT'" && return 0
139    [ -z "$id" ] && warn "# no subj id in '$3' matching '$ID_PATT'" && return 0
140    maskave_csv "$@" "$input_name" "$id"
141  }
142  
143  maskave_csv() {
144     mask_name="$1"; shift
145     mask_file="$1"; shift
146     cfile="$1"; shift
147     input_name="$1"; shift
148     id="$1"; shift
149  
150     already_exists "$mask_name,$id,$input_name," && return 0
151  
152     val="$(maskave_NA "$mask_file" "$cfile")"
153     echo "${mask_name},${id},${input_name},${val}"
154  }
155  
156  split_name(){
157     local name mask name_mask="$1"
158     name=${name_mask/=*/}
159     mask=${name_mask#*?=}
160     [[ $name == $mask ]] &&
161        name="$(niibasename "$mask")"
162     echo "$name" "$mask"
163  }
164  split_name_test() { #@test
165     read -r name mask <<< $(split_name "ACC=my/mask.nii.gz<1>")
166     [[ $name == ACC ]]
167     [[ $mask == "my/mask.nii.gz<1>" ]]
168  
169     read -r name mask <<< $(split_name "ACC=my/mask_gm=1.nii.gz<1>")
170     [[ $name == ACC ]]
171     [[ $mask == "my/mask_gm=1.nii.gz<1>" ]]
172  
173     read -r name mask <<< $(split_name "my/mask_gm1.nii.gz<1>")
174     [[ $name == mask_gm1_1 ]]
175     [[ $mask == "my/mask_gm1.nii.gz<1>" ]]
176  }
177  
178  cnt_uniq_match(){
179    cnt=$(printf '%s\n' "$@"| sort -u |wc -l)
180    [ "$cnt" -eq $# ]
181  }
182  cnt_test() { #@test
183     cnt_uniq_match a b c d
184     ! cnt_uniq_match b a d b
185  }
186  
187  check_all_masks(){
188    # double pass. first make sure everything checksout
189    # before running potentially a whole lot of 3dmaskavs
190    all_names=()
191    all_masks=()
192    local name mask name_mask
193    for name_mask in "$@"; do
194       read -r name mask <<< "$(split_name "$name_mask")"
195       #warn "# checking '$name_mask': '$name' = '$mask'"
196       all_names+=("$name")
197       all_masks+=("$mask")
198       check_mask "$mask" # mask has non-zero voxels
199    done
200  
201    # did we uniquely name all?
202    if ! cnt_uniq_match "${all_names[@]}"; then
203       warn "#ERROR: repeated names in name=mask: ${all_names[*]} "
204       return 1
205    fi
206  }
207  
208  maskave_grp_files(){
209    local name="$1"; shift
210    local mask="$1"; shift
211    local input_files=("$@");
212    warn "# extracting '$name' ($mask) mean from ${#input_files[@]} files"
213    if [ "$NJOBS" -gt 1 ]; then
214       parallel --jobs "$NJOBS" -q \
215          maskave_filename_info "$name" "$mask" ::: "${input_files[@]}" >> "$OUTNAME"
216    else
217       for cfile in "${input_files[@]}"; do
218          maskave_filename_info "$name" "$mask" "$cfile"  >> "$OUTNAME"
219       done
220    fi
221  }
222  maskave_grp_datatable(){
223     local name mask table cols
224     read -r name mask table cols <<< "$@"
225     mapfile -t ids < <(cut -f 1 "$table"|sed 1d)
226     mapfile -t labels < <(cut -f "$cols" "$table"| sed '1d;s/\s\+/_/g')
227     lastcol=$(awk '{print NF; exit}' "$table")
228     mapfile -t files < <(cut -f "$lastcol" "$table"|sed 1d)
229  
230     # NB. this might hit max number of input args for large datatable files
231     parallel --jobs "$NJOBS" -q \
232       maskave_csv "$name" "$mask" \
233         :::+ "${files[@]}" \
234         :::+ "${labels[@]}" \
235         :::+ "${ids[@]}" \
236          >> "$OUTNAME"
237  }
238  
239  check_datatable(){
240      local file="$1"; shift
241      local data_col="$1"; shift
242      ! test -r "$file" && warn "ERROR: cannot read $file!" && return 5
243  
244      if ! sed 1q "$file" |grep -q '^Subj.*InputFile$' ; then
245          warn "ERROR: '$file' must start with Subj and end with InputFile"
246          return 5
247      fi
248  
249      if grep -qm1 ' ' "$file"; then
250          warn "ERROR: '$file' includes spaces! must be tab delimited for this script"
251          return 5
252      fi
253  
254      if [ -z "$data_col" ]; then
255          warn "ERROR: must specify -label_col when using -datafile"
256          return 6
257      fi
258  
259      fcols=$(awk '{print NF; exit}' "$file")
260      maxcol=$(tr ',' '\n' <<< "$data_col"|sort -nr|sed 1q)
261      if [ $fcols -lt $maxcol ]; then
262          warn "ERROR: -label_col '$data_col' (max=$maxcol) when file has only $fcols columns"
263          return 6
264      fi
265  
266      return 0
267  }
268  
269  main() {
270    [ $# -eq 0 ] && usage && exit
271    parse_args "$@" || exit $?
272    warn "# checking all input masks"
273    check_all_masks "${MASK[@]}"
274  
275    if [ -n "$DATATABLE" ]; then
276        check_datatable "$DATATABLE" "$DATACOL" || exit $?
277    fi
278  
279    # don't need to check file for maskave if file is empty.
280    # run will be the same as REDO anyway
281    if [ ! -e "$OUTNAME" ] || [ "$(wc -l < "$OUTNAME" 2>/dev/null||echo 0)" -le 1 ]; then
282       echo "roi,subj,input,beta" > "$OUTNAME"
283       REDO=1
284    fi
285  
286    # export functions and pattern vars for gnu parallel
287    export -f check_all_masks cnt_test maskave_csv \
288              name_in_file id_in_file maskave_NA \
289              already_exists maskave_filename_info
290    export IN_PATT ID_PATT OUTNAME REDO VERBOSE
291  
292    for name_mask in "${MASK[@]}"; do
293      read -r name mask <<< "$(split_name "$name_mask")"
294      if [ -z "$DATATABLE" ]; then
295        maskave_grp_files "$name" "$mask" "${CFILES[@]}"
296      else
297        maskave_grp_datatable "$name" "$mask" "$DATATABLE" "$DATACOL"
298      fi
299    done
300  }
301  
302  eval "$(iffmain "main")"