Cradicle Explorer

mootils.jl
  1  @doc """
  2      Mootils
  3  
  4  The Mootils module is a collection of tools, macros, and helper functions designed for work in scientific computations and statistical analysis.
  5  
  6  It provides to manipulate and operate numerical arrays and introduces functionalities such as:
  7  
  8  * Handling of special floating point values such as `NaN` and `Inf`. 
  9    Functions for replacing these values or filtering them out are provided by macros such as @swapnan, @swapinf, @filtnan, @fltinf.
 10  
 11  * Creation of arrays with a specific type and shape using the @newarr macro.
 12  
 13  * Conditional execution of a code block with the @enable macro.
 14  
 15  * Traversal of arrays replacing non-finite values (@passinf, @passnan macros).
 16  
 17  * Conversion of an array to one of type Float64 (@arrfloat macro).
 18  
 19  * Transforming infinite values to specific numbers (@numinf macro).
 20  
 21  * Filtering and sorting functions are included for arrays (@filsoc function).
 22  
 23  * Functions for creating lagged views of arrays (lagged function).
 24  
 25  Furthermore, this module enhances the |> operator to handle tuples as argument sequences and adds utilities for functional programming with arrays.
 26  
 27  The module uses the StatsBase and IterTools packages.
 28  """
 29  module Mootils
 30  using IterTools
 31  using StatsBase
 32  using ..DocStringExtensions
 33  import Base.|>
 34  |>(xs::Tuple{Float64,Float64}, f) = f(xs...)
 35  |>(xs::Tuple{Float64,Float64,Float64}, f) = f(xs...)
 36  
 37  export @swapnan, @swapinf
 38  export filsoc, @fltinf, @filtnan
 39  export @newarr, @arrfloat
 40  export @unit_range!, unit_range
 41  export @skipnan, @passnan
 42  
 43  @doc """
 44  Create a new array with specified dimensions and element type.
 45  
 46  $(TYPEDSIGNATURES)
 47  
 48  This macro creates a new array with the given dimensions and element type. The array is initialized with undefined values.
 49  
 50  """
 51  macro newarr(dims, type=Float64)
 52      quote
 53          Array{$(esc(type))}(undef, $(esc(dims)))
 54      end
 55  end
 56  
 57  @doc """Defines a macro that conditionally enables a block of code.
 58  
 59  $(TYPEDSIGNATURES)
 60  
 61  If the condition `cond` is true, the macro evaluates `body...`, otherwise it does nothing.
 62  
 63  """
 64  macro enable(cond, body...)
 65      if cond
 66          quote
 67              $body...
 68          end
 69      else
 70          nothing
 71      end
 72  end
 73  
 74  
 75  @doc """ Replaces infinite values in an array with a specified value.
 76  
 77  $(TYPEDSIGNATURES)
 78  
 79  The `passinf` macro traverses an array and replaces any infinite values it encounters with a specified value.
 80  The default replacement value is 1.0.
 81  It uses the `imap` function from the IterTools package to achieve this.
 82  """
 83  macro passinf(arr, val=1.0)
 84      s_arr = esc(arr)
 85      s_val = esc(val)
 86      quote
 87          imap((el) -> isfinite(el) ? el : $s_val, $s_arr)
 88      end
 89  end
 90  
 91  @doc """Defines a macro that replaces NaN values in an array with a specified value.
 92  
 93  $(TYPEDSIGNATURES)
 94  
 95  If the element `el` in the array `arr` is NaN, it is replaced with the value `val`. The default value is 0.0.
 96  
 97  """
 98  macro passnan(arr, val=0.0)
 99      s_val = esc(val)
100      s_arr = esc(arr)
101      quote
102          imap((el) -> isnan(el) ? $s_val : el, $s_arr)
103      end
104  end
105  
106  @doc """ Converts an array to Float64 type conditionally
107  
108  $(TYPEDSIGNATURES)
109  
110  The `arrfloat` macro takes an array and a boolean flag as input.
111  If the flag is `true`, it converts the array to Float64 type.
112  Otherwise, it returns the original array.
113  
114  """
115  macro arrfloat(arr, yes=true)
116      quote
117          arr = $(esc(arr))
118          if $yes == true
119              Array{Float64,ndims(arr)}(arr)
120          else
121              arr
122          end
123      end
124  end
125  
126  @doc """ Transforms infinite values to specific numbers
127  
128  $(TYPEDSIGNATURES)
129  
130  The `numinf` macro takes a value and two optional parameters for infinite and negative infinite values.
131  If the input value is `Inf`, it is replaced with the first optional parameter (default is 1.0).
132  If the input value is `-Inf`, it is replaced with the second optional parameter (default is 0.0).
133  Otherwise, the original value is returned.
134  
135  """
136  macro numinf(val, infv=1.0, nanv=0.0)
137      ev = esc(val)
138      pi = esc(infv)
139      ni = esc(nanv)
140      quote
141          if $ev === Inf
142              $pi
143          elseif $ev == -Inf
144              $ni
145          else
146              $ev
147          end
148      end
149  end
150  
151  @doc """ Replaces infinite and NaN values in an array with specified values
152  
153  $(TYPEDSIGNATURES)
154  
155  The `swapinf` macro takes an array and three optional parameters: a boolean flag and two values for NaN and infinite values.
156  It traverses the array and replaces any non-finite values it encounters with the specified values.
157  If the boolean flag is `true`, it also converts the array to Float64 type.
158  
159  """
160  macro swapinf(arr, conv=false, nanv=0, infv=1)
161      nanv = esc(nanv)
162      infv = esc(infv)
163      arr = esc(arr)
164      quote
165          @arrfloat(
166              map((el) -> isfinite(el) ? el : (isnan(el) ? $nanv : sign(el) * $infv), $arr),
167              $conv
168          )
169      end
170  end
171  
172  @doc """ Replaces NaN values in an array with a specified value
173  
174  $(TYPEDSIGNATURES)
175  
176  The `swapnan` macro takes an array and a value as input.
177  It traverses the array and replaces any NaN values it encounters with the specified value.
178  
179  """
180  macro swapnan(arr, val)
181      quote
182          map((el) -> isnan(el) ? $(esc(val)) : el, $(esc(arr)))
183      end
184  end
185  
186  @doc """ Filters out NaN values from an array
187  
188  $(TYPEDSIGNATURES)
189  
190  The `filtnan` macro takes an array as input.
191  It filters out any NaN values it encounters in the array and returns the filtered array.
192  
193  """
194  macro filtnan(arr)
195      quote
196          filter(!isnan, $(esc(arr)))
197      end
198  end
199  
200  @doc """ Applies a function to an array, skipping NaN values
201  
202  $(TYPEDSIGNATURES)
203  
204  The `skipnan` macro takes a function, an array, and an optional dimension as input.
205  It applies the function to the array, skipping any NaN values.
206  If a dimension is specified, the function is applied to slices of the array along that dimension.
207  
208  """
209  macro skipnan(f, arr, dims=nothing)
210      arr = esc(arr)
211      if isnothing(dims)
212          quote
213              $f(filter(!isnan, $(esc(arr))))
214          end
215      else
216          quote
217              mapslices(x -> $f(filter(!isnan, x)), $(esc(arr)); dims=$dims)
218          end
219      end
220  end
221  
222  @doc """ Filters out infinite values from an array
223  
224  $(TYPEDSIGNATURES)
225  
226  The `fltinf` macro takes an array as input.
227  It filters out any infinite values it encounters in the array and returns the filtered array.
228  
229  """
230  macro fltinf(arr)
231      quote
232          filter(isfinite, $(esc(arr)))
233      end
234  end
235  
236  @doc """ Applies a function to an array, skipping values based on a predicate
237  
238  $(TYPEDSIGNATURES)
239  
240  The `_maparr` macro takes a function, an array, a dimension, and a predicate as input.
241  It applies the function to the array, skipping any values that do not satisfy the predicate.
242  If a dimension is specified, the function is applied to slices of the array along that dimension.
243  
244  """
245  macro _maparr(f, arr, dims, pred=!isnan)
246      if isnothing(dims)
247          quote
248              $f(filter($pred, $(esc(arr))))
249          end
250      else
251          quote
252              mapslices(x -> $f(filter($pred, x)), $(esc(arr)); dims=$dims)
253          end
254      end
255  end
256  
257  @doc """ Transforms an array to a unit range.
258  
259  $(TYPEDSIGNATURES)
260  
261  The `unit_range` function takes an array as input and transforms it to a unit range using the `UnitRangeTransform` from the `StatsBase` package.
262  The transformation is applied to the array and the transformed array is returned.
263  
264  """
265  function unit_range(arr)
266      return StatsBase.transform(fit(UnitRangeTransform, arr), arr)
267  end
268  
269  @doc """ Transforms an array to a unit range in-place.
270  
271  $(TYPEDSIGNATURES)
272  
273  The `unit_range!` function takes an array and a boolean flag as input. If the flag is `true`, it transforms the array to a unit range using the `UnitRangeTransform` from the `StatsBase` package. The transformation is applied in-place to the array.
274  
275  """
276  macro unit_range!(arr, yes=true)
277      if yes == true
278          quote
279              arr = $(esc(arr))
280              return StatsBase.transform!(fit(UnitRangeTransform, arr), arr)
281          end
282      end
283  end
284  
285  @doc """ Filters, sorts, and optionally concatenates an array based on a value.
286  
287  $(TYPEDSIGNATURES)
288  
289  The `filsoc` function takes an array, a value, another array to match, and two optional boolean flags. It filters the input array based on the provided value, sorts it, and if the `concat` flag is `true`, concatenates the result with the matched array at the equivalent sorted index.
290  
291  """
292  function filsoc(arr, pct, match; inv::Bool=false, concat::Bool=true)
293      pct_mask = inv ? arr .< pct : arr .> pct
294      sort_mask = sortperm(arr[pct_mask, :])
295      values = arr[pct_mask, :][sort_mask, :]
296  
297      if concat && !isnothing(match)
298          values = hcat(values, match[pct_mask, :][sort_mask, :])
299      end
300      return values
301  end
302  
303  @doc """ Unzips a collection of tuples into separate arrays.
304  
305  $(TYPEDSIGNATURES)
306  
307  The `unzip` function takes a collection of tuples as input. It separates each tuple into its constituent elements and returns a tuple of arrays, each containing the elements of the input tuples at the corresponding position.
308  
309  """
310  function unzip(a)
311      return map(x -> getfield.(a, x), fieldnames(eltype(a)))
312  end
313  
314  @doc """ Returns a view of `v` with value shifted according to `n` taking the last `window` values only.
315  
316  $(TYPEDSIGNATURES)
317  
318  The `lagged` function takes an array, a window size, an optional index, and a shift value `n` as input. It returns a view of the array with values shifted according to `n`, considering only the last `window` values. It only makes sense when `n` > 0.
319  
320  """
321  function lagged(v, window; idx=lastindex(v), n=1)
322      @assert n > 0
323      @view v[max(begin, idx - window - n + 1):(idx - n)]
324  end
325  
326  end