/ vendor / metadata.patch
metadata.patch
  1  diff --git a/src/Filters.jl b/src/Filters.jl
  2  index 2666bdf..2951e4e 100644
  3  --- a/src/Filters.jl
  4  +++ b/src/Filters.jl
  5  @@ -27,18 +27,21 @@ Encodes and decodes variable-length arrays of arbitrary data type
  6   """
  7   struct VLenArrayFilter{T} <: Filter{T,UInt8} end
  8   
  9  -function zdecode(ain, ::VLenArrayFilter{T}) where T
 10  +function _zdecode(ain, E::Type)
 11       f = IOBuffer(ain)
 12       nitems = read(f, UInt32)
 13  -    out = Array{Vector{T}}(undef,nitems)
 14  +    out = Array{Vector{E}}(undef,nitems)
 15       for i=1:nitems
 16           len1 = read(f,UInt32)
 17  -        out[i] = read!(f,Array{T}(undef,len1 ÷ sizeof(T)))
 18  +        out[i] = read!(f,Array{E}(undef,len1 ÷ sizeof(E)))
 19       end
 20       close(f)
 21       out
 22   end
 23   
 24  +zdecode(ain, ::VLenArrayFilter{T}) where T <: AbstractArray{E} where E = _zdecode(ain, E)
 25  +zdecode(ain, ::VLenArrayFilter{V}) where V = _zdecode(ain, V)
 26  +
 27   #Encodes Array of Vectors a into bytes
 28   function zencode(ain,::VLenArrayFilter)
 29       b = IOBuffer()
 30  @@ -51,8 +54,46 @@ function zencode(ain,::VLenArrayFilter)
 31       take!(b)
 32   end
 33   
 34  -JSON.lower(::VLenArrayFilter{T}) where T = Dict("id"=>"vlen-array","dtype"=> typestr(T) )
 35  +JSON.lower(::VLenArrayFilter{T}) where T = Dict("id"=>"vlen-array","dtype"=> typestr(eltype(T)) )
 36  +
 37  +getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{Vector{typestr(f["dtype"])}}()
 38  +
 39  +"""
 40  +    VLenUTF8Filter
 41  +
 42  +Encodes and decodes variable-length arrays of arbitrary data type
 43  +"""
 44  +struct VLenUTF8Filter <: Filter{String,UInt8} end
 45  +
 46  +function zdecode(ain, ::VLenUTF8Filter)
 47  +    arbuf = UInt8[]
 48  +    f = IOBuffer(ain)
 49  +    nitems = read(f, UInt32)
 50  +    out = Array{String}(undef,nitems)
 51  +    for i=1:nitems
 52  +        len1 = read(f,UInt32)
 53  +        resize!(arbuf,len1)
 54  +        read!(f,arbuf)
 55  +        out[i] = String(arbuf)
 56  +    end
 57  +    close(f)
 58  +    out
 59  +end
 60  +
 61  +#Encodes Array of Vectors a into bytes
 62  +function zencode(ain,::VLenUTF8Filter)
 63  +    b = IOBuffer()
 64  +    nitems = length(ain)
 65  +    write(b,UInt32(nitems))
 66  +    for a in ain
 67  +        write(b, UInt32(sizeof(a)))
 68  +        write(b, a)
 69  +    end
 70  +    take!(b)
 71  +end
 72  +
 73  +JSON.lower(::VLenUTF8Filter) = Dict("id"=>"vlen-utf8","dtype"=> "|O" )
 74   
 75  -getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{typestr(f["dtype"])}()
 76  +getfilter(::Type{<:VLenUTF8Filter}, f) = VLenUTF8Filter()
 77   
 78  -filterdict = Dict("vlen-array"=>VLenArrayFilter)
 79  \ No newline at end of file
 80  +const filterdict = Dict("vlen-array"=>VLenArrayFilter, "vlen-utf8"=>VLenUTF8Filter)
 81  diff --git a/src/ZArray.jl b/src/ZArray.jl
 82  index 4e7b300..c41dd96 100644
 83  --- a/src/ZArray.jl
 84  +++ b/src/ZArray.jl
 85  @@ -135,11 +135,7 @@ function getchunkarray(z::ZArray{>:Missing})
 86     inner = fill(z.metadata.fill_value, z.metadata.chunks)
 87     a = SenMissArray(inner,z.metadata.fill_value)
 88   end
 89  -_zero(T) = zero(T)
 90  -_zero(T::Type{<:MaxLengthString}) = T("")
 91  -_zero(T::Type{ASCIIChar}) = ASCIIChar(0)
 92  -_zero(::Type{<:Vector{T}}) where T = T[]
 93  -getchunkarray(z::ZArray) = fill(_zero(eltype(z)), z.metadata.chunks)
 94  +getchunkarray(z::ZArray) = Array{eltype(z)}(undef, z.metadata.chunks...)
 95   
 96   maybeinner(a::Array) = a
 97   maybeinner(a::SenMissArray) = a.x
 98  @@ -254,6 +250,10 @@ Read the chunk specified by `i` from the Zarray `z` and write its content to `a`
 99   """
100   function uncompress_raw!(a,z::ZArray{<:Any,N},curchunk) where N
101     if curchunk === nothing
102  +    @assert eltype(a) == typeof(z.metadata.fill_value) "Type mismatch \
103  +      between array element of type '$(eltype(a))' \
104  +      and default value of type '$(typeof(z.metadata.fill_value))'. \
105  +      Use a different default value."
106       fill!(a, z.metadata.fill_value)
107     else
108       zuncompress!(a, curchunk, z.metadata.compressor, z.metadata.filters)
109  @@ -355,6 +355,7 @@ function zcreate(::Type{T},storage::AbstractStore,
110   end
111   
112   filterfromtype(::Type{<:Any}) = nothing
113  +filterfromtype(::Type{<:AbstractString}) = (VLenUTF8Filter(),)
114   
115   function filterfromtype(::Type{<:AbstractArray{T}}) where T
116     #Here we have to apply the vlenarray filter
117  diff --git a/src/ZGroup.jl b/src/ZGroup.jl
118  index 8bc54be..6d92548 100644
119  --- a/src/ZGroup.jl
120  +++ b/src/ZGroup.jl
121  @@ -19,6 +19,10 @@ function ZGroup(s::T,mode="r",path="";fill_as_missing=false) where T <: Abstract
122     groups = Dict{String, ZGroup}()
123   
124     for d in subdirs(s,path)
125  +    @debug if d == path
126  +      @warn "Store is corrupted, probably has keys starting with '/' (it should not!)."
127  +      continue
128  +    end
129       dshort = split(d,'/')[end]
130       m = zopen_noerr(s,mode,path=_concatpath(path,dshort),fill_as_missing=fill_as_missing)
131       if isa(m, ZArray)
132  diff --git a/src/metadata.jl b/src/metadata.jl
133  index ae50634..7a8b0b9 100644
134  --- a/src/metadata.jl
135  +++ b/src/metadata.jl
136  @@ -53,6 +53,7 @@ Base.zero(t::Union{DateTime64, Type{<:DateTime64}}) = t(0)
137   
138   
139   typestr(t::Type) = string('<', 'V', sizeof(t))
140  +typestr(t::Type{<:AbstractString}) = string('<', 'O')
141   typestr(t::Type{>:Missing}) = typestr(Base.nonmissingtype(t))
142   typestr(t::Type{Bool}) = string('<', 'b', sizeof(t))
143   typestr(t::Type{<:Signed}) = string('<', 'i', sizeof(t))
144  @@ -63,6 +64,7 @@ typestr(::Type{MaxLengthString{N,UInt32}}) where N = string('<', 'U', N)
145   typestr(::Type{MaxLengthString{N,UInt8}}) where N = string('<', 'S', N)
146   typestr(::Type{<:Array}) = "|O"
147   typestr(::Type{<:DateTime64{P}}) where P = "<M8[$(pdt64string[P])]"
148  +typestr(t::Type{Union{Nothing, T}}) where T = typestr(T)
149   
150   const typestr_regex = r"^([<|>])([tbiufcmMOSUV])(\d*)(\[\w+\])?$"
151   const typemap = Dict{Tuple{Char, Int}, DataType}(
152  @@ -96,7 +98,7 @@ function typestr(s::AbstractString, filterlist=nothing)
153               if filterlist === nothing
154                   throw(ArgumentError("Object array can only be parsed when an appropriate filter is defined"))
155               end
156  -            return Vector{sourcetype(first(filterlist))}
157  +            return sourcetype(first(filterlist))
158           end
159           isempty(typesize) && throw((ArgumentError("$s is not a valid numpy typestr")))
160           tc, ts = first(typecode), parse(Int, typesize)