/ StrategyStats / src / metrics.jl
metrics.jl
  1  using Data.DataFramesMeta
  2  using .Misc: config, DFT
  3  using Data: @to_mat, PairData
  4  using .Misc.Lang
  5  
  6  @doc """Identifies maximum and minimum points in a DataFrame.
  7  
  8  $(TYPEDSIGNATURES)
  9  
 10  The `maxmin` function takes the following parameters:
 11  
 12  - `df`: a DataFrame in which to identify maxima and minima.
 13  - `order` (optional, default is 1): an integer specifying how many points on each side of a point to use for the comparison to consider the point as a maximum or minimum. For example, if order=3, a point will be considered a maximum if it has three datapoints in either direction that are smaller than it.
 14  - `threshold` (optional, default is 0.0): a threshold value which the datapoint must exceed to be considered a maximum or minimum.
 15  - `window` (optional, default is 100): a window size to apply a moving maximum/minimum filter.
 16  
 17  The function identifies maximum and minimum points in the DataFrame `df` based on the specified `order`, `threshold`, and `window`. It then returns a DataFrame with the identified maxima and minima.
 18  """
 19  function maxmin(df; order=1, threshold=0.0, window=100)
 20      df[!, :maxima] .= NaN
 21      df[!, :minima] .= NaN
 22      dfv = @view df[(window + 2):end, :]
 23      price = df.close
 24      # prev_window = window - 2
 25      @eachrow! dfv begin
 26          stop = row + window
 27          # ensure no lookahead bias
 28          @assert df.timestamp[stop] < :timestamp
 29          subts = @view(price[row:stop])
 30          mx = maxima(subts; order, threshold)
 31          local ma = mi = NaN
 32          for (n, x) in enumerate(mx)
 33              if x
 34                  ma = n
 35                  break
 36              end
 37          end
 38          mn = minima(subts; order, threshold)
 39          for (n, x) in enumerate(mn)
 40              if x
 41                  mi = n
 42                  break
 43              end
 44          end
 45          :maxima = ma > mi
 46          :minima = mi > ma
 47      end
 48      df
 49  end
 50  
 51  @doc """Calculates the success rate of given column against the next candle.
 52  
 53  $(TYPEDSIGNATURES)
 54  
 55  The `up_successrate` function takes the following parameters:
 56  
 57  - `df`: a DataFrame that represents historical market data.
 58  - `bcol`: a Symbol or String that represents the column name in `df` to calculate the success rate against.
 59  - `threshold` (optional, default is 0.05): a threshold value which the price change must exceed to be considered a success.
 60  
 61  The function calculates the success rate of a particular strategy indicated by `bcol` for buying or selling. A trade is considered successful if the price change in the next candle exceeds the `threshold`. The direction of the trade (buy or sell) is determined by the `bcol` column: `true` for buy and `false` for sell.
 62  
 63  The function returns a float that represents the success rate of the strategy.
 64  
 65  """
 66  function up_successrate(df, bcol::Union{Symbol,String}; threshold=0.05)
 67      bcol_v = (x -> circshift(x, 1))(getproperty(df, bcol))
 68      bcol_v[1] = NaN
 69      rate = 0
 70      tv = 1 + threshold
 71      @eachrow df begin
 72          br = bcol_v[row]
 73          rate += convert(Int, Bool(isnan(br) ? false : br) && :high / :open > tv)
 74      end
 75      rate
 76  end
 77  
 78  @doc "Complement of [`up_successrate`](@ref)."
 79  function down_successrate(df, bcol::Union{Symbol,String}; threshold=0.05)
 80      bcol_v = (x -> circshift(x, 1))(getproperty(df, bcol))
 81      bcol_v[1] = NaN
 82      rate = 0
 83      tv = 1 + threshold
 84      @eachrow df begin
 85          br = bcol_v[row]
 86          rate += convert(Int, Bool(isnan(br) ? false : br) && :open / :low > tv)
 87      end
 88      rate
 89  end
 90  
 91  @doc """Identifies support and resistance levels in a DataFrame.
 92  
 93  $(TYPEDSIGNATURES)
 94  
 95  The `supres` function takes the following parameters:
 96  
 97  - `df`: a DataFrame in which to identify support and resistance levels.
 98  - `order` (optional, default is 1): an integer specifying how many points on each side of a point to use for the comparison to consider the point as a support or resistance level. For example, if order=3, a point will be considered a support/resistance level if it has three datapoints in either direction that are smaller/larger than it.
 99  - `threshold` (optional, default is 0.0): a threshold value which the datapoint must exceed to be considered a support or resistance level.
100  - `window` (optional, default is 16): a window size to apply a moving maximum/minimum filter.
101  
102  The function identifies support and resistance levels in the DataFrame `df` based on the specified `order`, `threshold`, and `window`. It then returns a DataFrame with the identified support and resistance levels.
103  
104  """
105  function supres(df; order=1, threshold=0.0, window=16)
106      df[!, :sup] .= NaN
107      df[!, :res] .= NaN
108      dfv = @view df[(window + 2):end, :]
109      price = df.close
110      local prev_r, prev_s
111      @assert window > 15 # use a large enough window size to prevent zero values
112      @eachrow! dfv begin
113          stop = row + window
114          # ensure no lookahead bias
115          @ifdebug @assert df.timestamp[stop] < :timestamp
116          subts = @view price[row:stop]
117          res = resistance(subts; order, threshold)
118          sup = support(subts; order, threshold=-threshold)
119          r = findfirst(isfinite, res)
120          s = findfirst(isfinite, sup)
121          :res = isnothing(r) ? prev_r : prev_r = res[r]
122          :sup = isnothing(s) ? prev_s : prev_s = sup[s]
123          @ifdebug @assert !iszero(prev_r)
124      end
125      df
126  end
127  
128  @doc """Generates a Renko chart DataFrame.
129  
130  $(TYPEDSIGNATURES)
131  
132  The `renkodf` function takes the following parameters:
133  
134  - `df`: a DataFrame that represents historical market data.
135  - `box_size` (optional, default is 10.0): a float that represents the box size for the Renko chart. This is the minimum price change required to form a new brick in the chart.
136  - `use_atr` (optional, default is false): a boolean that indicates whether to use the Average True Range (ATR) to determine the box size. If true, the function will calculate the ATR over `n` periods and use this as the box size.
137  - `n` (optional, default is 14): an integer that represents the number of periods to calculate the ATR over if `use_atr` is true.
138  
139  The function generates a Renko chart DataFrame based on the input DataFrame `df` and the specified parameters. Renko charts are price charts with rising and falling bricks (or boxes) that are based on changes in price, not time, unlike most charts. They help filter out market noise and can be a useful tool in technical analysis.
140  The function returns a DataFrame that represents the Renko chart.
141  
142  """
143  function renkodf(df; box_size=10.0, use_atr=false, n=14)
144      local rnk_idx
145      if use_atr
146          type = Float64
147          rnk_idx = renko(@to_mat(@view(df[:, [:high, :low, :close]])); box_size, use_atr, n)
148      else
149          rnk_idx = renko(df.close; box_size)
150      end
151      # can't use view on sub dataframes
152      rnk_df = df[rnk_idx, [:open, :high, :low, :close, :volume]]
153      rnk_df[!, :timestamp] = df.timestamp
154      rnk_df
155  end
156  
157  @doc "A good renko entry is determined by X candles of the opposite color after Y candles."
158  function isrenkoentry(df::AbstractDataFrame; head=3, tail=1, long=true, kwargs...)
159      size(df, 1) < 1 && return false
160      rnk = renkodf(df; kwargs...)
161      @assert head > 0 && tail > 0
162      size(rnk, 1) > head + tail || return false
163      if long
164          # if long the tail (the last candles) must be red
165          tailcheck = all(rnk.close[end - n] <= rnk.open[end - n] for n in 0:(tail - 1))
166          tailcheck || return tailcheck
167          # since long, the trend must be green
168          headcheck = all(rnk.close[end - n] > rnk.open[end - n] for n in tail:head)
169          return headcheck
170      else
171          # opposite...
172          tailcheck = all(rnk.close[end - n] > rnk.open[end - n] for n in 0:(tail - 1))
173          tailcheck || return tailcheck
174          headcheck = all(rnk.close[end - n] <= rnk.open[end - n] for n in tail:head)
175          return headcheck
176      end
177  end
178  
179  @doc """Determines if the current state in a Renko chart indicates an entry point.
180  
181  $(TYPEDSIGNATURES)
182  
183  The `isrenkoentry` function takes the following parameters:
184  
185  - `data`: an AbstractDict that represents the current state in a Renko chart.
186  - `kwargs`: a variable number of optional keyword arguments that allow you to specify additional criteria for an entry point.
187  
188  The function determines if the current state in the Renko chart represented by `data` indicates an entry point based on the specified criteria. An entry point in a Renko chart is typically determined by a change in the direction of the bricks (or boxes).
189  
190  The function returns a boolean that indicates whether the current state represents an entry point.
191  
192  """
193  function isrenkoentry(data::AbstractDict; kwargs...)
194      out = Bool[]
195      for (_, p) in data
196          isrenkoentry(p.data; kwargs...) && push!(out, p.name)
197      end
198      out
199  end
200  
201  @doc """Generates a grid of Renko charts with varying parameters.
202  
203  $(TYPEDSIGNATURES)
204  
205  The `gridrenko` function takes the following parameters:
206  
207  - `data`: an AbstractDataFrame that represents historical market data.
208  - `head_range` (optional, default is 1:10): a range that represents the range of possible values for the head in the Renko chart. The head is the most recent part of the chart.
209  - `tail_range` (optional, default is 1:3): a range that represents the range of possible values for the tail in the Renko chart. The tail is the oldest part of the chart.
210  - `n_range` (optional, default is 10:10:200): a range that represents the range of possible values for the number of periods to calculate the Average True Range (ATR) over.
211  
212  The function generates a grid of Renko charts based on the input DataFrame `data` and the specified parameters. Each chart in the grid uses a different combination of `head_range`, `tail_range`, and `n_range`.
213  The function returns a DataFrame that represents the grid of Renko charts.
214  
215  """
216  function gridrenko(
217      data::AbstractDataFrame; head_range=1:10, tail_range=1:3, n_range=10:10:200
218  )
219      out = []
220      for head in head_range, tail in tail_range, n in n_range
221          if isrenkoentry(data; head, tail, n)
222              push!(out, (; head, tail, n))
223          end
224      end
225      out
226  end
227  
228  @doc "[`gridrenko`](@ref) over a dict of `PairData`."
229  function gridrenko(data::AbstractDict; as_df=false, kwargs...)
230      out = Dict()
231      for (_, p) in data
232          trials = gridrenko(p.data; kwargs...)
233          length(trials) > 0 && setindex!(out, trials, p.name)
234      end
235      as_df && return DataFrame(vcat(values(out)...))
236      out
237  end
238  
239  @doc """Adds Bollinger Bands to a DataFrame.
240  
241  $(TYPEDSIGNATURES)
242  
243  The `bbands!` function takes the following parameters:
244  
245  - `df`: an AbstractDataFrame to which the Bollinger Bands will be added.
246  - `kwargs`: a variable number of optional keyword arguments that allow you to specify additional parameters for the Bollinger Bands.
247  
248  The function calculates the Bollinger Bands for the data in `df` based on the specified parameters in `kwargs`. Bollinger Bands are a type of statistical chart characterizing the prices and volatility over time of a financial instrument or commodity, using a formulaic method propounded by John Bollinger in the 1980s.
249  The function modifies the input DataFrame `df` in place by adding the calculated Bollinger Bands.
250  
251  """
252  function bbands!(df::AbstractDataFrame; kwargs...)
253      local bb
254      bbcols = [:bb_low, :bb_mid, :bb_high]
255      bb = bbands(df; kwargs...)
256      if bbcols[1] ∈ getfield(df, :colindex).names
257          df[!, bbcols] = bb
258      else
259          insertcols!(
260              df, [c => @view(bb[:, n]) for (n, c) in enumerate(bbcols)]...; copycols=false
261          )
262      end
263      df
264  end
265  
266  using Base.Iterators: countfrom, take
267  using Base.Threads: @spawn
268  using OnlineTechnicalIndicators: BB, OnlineTechnicalIndicators as oti
269  const Float = typeof(0.0)
270  
271  function bbands(df::AbstractDataFrame, n=oti.BB_PERIOD; sigma=oti.BB_STD_DEV_MULT)
272      o = oti.BB{DFT}(period=n, std_dev_mult=sigma)
273      bb_low = Union{Missing,DFT}[]
274      bb_mid = Union{Missing,DFT}[]
275      bb_high = Union{Missing,DFT}[]
276      for price in df.close
277          oti.fit!(o, price)
278          v = o.value
279          if ismissing(v)
280              push!(bb_low, missing)
281              push!(bb_mid, missing)
282              push!(bb_high, missing)
283              continue
284          end
285          push!(bb_low, v.lower)
286          push!(bb_mid, v.central)
287          push!(bb_high, v.upper)
288      end
289      DataFrame(; lower=bb_low, central=bb_mid, upper=bb_high)
290  end
291  
292  @doc """Generates a grid of Bollinger Bands with varying parameters.
293  
294  $(TYPEDSIGNATURES)
295  
296  The `gridbbands` function takes the following parameters:
297  
298  - `df`: an AbstractDataFrame that represents historical market data.
299  - `n_range` (optional, default is 2:2:100): a range that represents the range of possible values for the number of periods to calculate the moving average over.
300  - `sigma_range` (optional, default is [1.0]): an array that represents the range of possible values for the number of standard deviations to calculate the bands at.
301  - `corr` (optional, default is :corke): a symbol that represents the correlation method to use.
302  
303  The function generates a grid of Bollinger Bands based on the input DataFrame `df` and the specified parameters. Each band in the grid uses a different combination of `n_range` and `sigma_range`.
304  
305  The function returns a DataFrame that represents the grid of Bollinger Bands.
306  
307  """
308  function gridbbands(df::AbstractDataFrame; n_range=2:2:100, sigma_range=[1.0], corr=:corke)
309      out = Dict()
310      out_df = []
311      # out_df = IdDict(n => [] for n in 1:Threads.nthreads())
312      if n_range isa UnitRange
313          n_range = (n_range.start):min(size(df, 1) - 1, n_range.stop)
314      elseif n_range isa StepRange
315          n_range = (n_range.start):(n_range.step):min(size(df, 1) - 1, n_range.stop)
316      end
317      local postproc
318      if eval(corr) isa Function
319          corfn = getproperty(@__MODULE__, corr)
320          postproc =
321              (n, bb) -> begin
322                  vals = collect(
323                      corfn(@view(bb[:, col1][n:end]), @view(getproperty(df, col2)[n:end])) for (col1, col2) in ((1, :low), (2, :close), (2, :high))
324                  )
325                  (; bb_low_corr=vals[1], bb_mid_corr=vals[2], bb_high_corr=vals[3])
326              end
327      else
328          postproc = (_, _) -> (nothing, nothing, nothing)
329      end
330      # p = Progress(length(n_range) * length(sigma_range))
331      th = []
332      l = ReentrantLock()
333      for n in n_range, sigma in sigma_range
334          push!(th, Threads.@spawn begin
335              bb = bbands(df; n, sigma)
336              co = postproc(n, bb)
337              lock(l)
338              push!(out_df, (; n, sigma, co...))
339              size(bb, 1) > 0 && setindex!(out, bb, (; n, sigma))
340              # next!(p)
341              unlock(l)
342          end)
343      end
344      for t in th
345          wait(t)
346      end
347      out, DataFrame(out_df)
348  end
349  
350  macro checksize(data=nothing)
351      ohlcv = isnothing(data) ? esc(:ohlcv) : esc(data)
352      n = esc(:n)
353      quote
354          size($ohlcv, 1) <= $n && return false
355      end
356  end
357  
358  @doc """Determines if a peak has occurred in the OHLCV data.
359  
360  $(TYPEDSIGNATURES)
361  
362  The `is_peaked` function takes the following parameters:
363  
364  - `ohlcv`: a DataFrame that represents OHLCV (Open, High, Low, Close, Volume) data.
365  - `thresh` (optional, default is 0.05): a threshold value which the price change must exceed to be considered a peak.
366  - `n` (optional, default is 26): an integer that represents the number of periods to consider for the peak detection.
367  """
368  function is_peaked(ohlcv::DataFrame; thresh=0.05, n=26)
369      @checksize
370      bb = bbands(ohlcv; n)
371      ohlcv.close[end] / bb[end, 3] > 1 + thresh
372  end
373  
374  function is_peaked(ohlcv::DataFrame, bb::AbstractArray; thresh=0.05)
375      @checksize
376      ohlcv.close[end] / bb[end, 3] > 1 + thresh
377  end
378  
379  @doc """Determines if a bottom has occurred in the OHLCV data.
380  
381  $(TYPEDSIGNATURES)
382  
383  The `is_bottomed` function takes the following parameters:
384  
385  - `ohlcv`: a DataFrame that represents OHLCV (Open, High, Low, Close, Volume) data.
386  - `thresh` (optional, default is 0.05): a threshold value which the price change must exceed to be considered a bottom.
387  - `n` (optional, default is 26): an integer that represents the number of periods to consider for the bottom detection.
388  
389  The function determines if a bottom has occurred in the OHLCV data based on the specified threshold and number of periods. A bottom is considered to have occurred when the price change exceeds the threshold within the given number of periods.
390  The function returns a boolean that indicates whether a bottom has occurred.
391  
392  """
393  function is_bottomed(ohlcv::DataFrame; thresh=0.05, n=26)
394      @checksize
395      bb = bbands(ohlcv; n)
396      ohlcv.close[end] / bb[end, 1] < 1 + thresh
397  end
398  
399  function is_bottomed(ohlcv::DataFrame, bb::AbstractArray; thresh=0.05)
400      @checksize
401      ohlcv.close[end] / bb[end, 1] < 1 + thresh
402  end
403  
404  @doc """Determines if an uptrend has occurred in the OHLCV data.
405  
406  $(TYPEDSIGNATURES)
407  
408  The `is_uptrend` function takes the following parameters:
409  
410  - `ohlcv`: a DataFrame that represents OHLCV (Open, High, Low, Close, Volume) data.
411  - `thresh` (optional, default is 0.05): a threshold value which the price change must exceed to be considered an uptrend.
412  - `n` (optional, default is 26): an integer that represents the number of periods to consider for the uptrend detection.
413  
414  The function determines if an uptrend has occurred in the OHLCV data based on the specified threshold and number of periods. An uptrend is considered to have occurred when the price change exceeds the threshold within the given number of periods.
415  """
416  function is_uptrend(ohlcv::DataFrame; thresh=0.05, n=26)
417      @checksize
418      momentum(@view(ohlcv.close[(end - n):end]); n)[end] > thresh
419  end
420  
421  # function is_lowvol(ohlcv::DataFrame; thresh=0.05, n=3) end
422  
423  """
424  ```
425  diffn(x::Vector{T}; n::Int=1)::Vector{T} where {T<:Real}
426  diffn(X::Matrix; n::Int=1)::Matrix = hcat([diffn(X[:,j], n=n) for j in 1:size(X,2)]...)
427  ```
428  
429  Lagged differencing
430  """
431  function diffn(x::AbstractVector{T}; n::Int=1)::Vector{T} where {T<:Real}
432      @assert n<size(x,1) && n>0 "Argument n out of bounds."
433      dx = zeros(size(x))
434      dx[1:n] .= NaN
435      @inbounds for i=n+1:size(x,1)
436          dx[i] = x[i] - x[i-n]
437      end
438      return dx
439  end
440  diffn(X::AbstractMatrix; n::Int=1)::Matrix = hcat([diffn(X[:,j], n=n) for j in 1:size(X,2)]...)
441  function momentum(x::AbstractArray{T}; n::Int64=1)::Array{Float64} where {T<:Real}
442      @assert n>0 "Argument n must be positive."
443      return diffn(x, n=n)
444  end
445  
446  include("slope.jl")