metadata.patch
1 diff --git a/src/Filters.jl b/src/Filters.jl 2 index 2666bdf..2951e4e 100644 3 --- a/src/Filters.jl 4 +++ b/src/Filters.jl 5 @@ -27,18 +27,21 @@ Encodes and decodes variable-length arrays of arbitrary data type 6 """ 7 struct VLenArrayFilter{T} <: Filter{T,UInt8} end 8 9 -function zdecode(ain, ::VLenArrayFilter{T}) where T 10 +function _zdecode(ain, E::Type) 11 f = IOBuffer(ain) 12 nitems = read(f, UInt32) 13 - out = Array{Vector{T}}(undef,nitems) 14 + out = Array{Vector{E}}(undef,nitems) 15 for i=1:nitems 16 len1 = read(f,UInt32) 17 - out[i] = read!(f,Array{T}(undef,len1 ÷ sizeof(T))) 18 + out[i] = read!(f,Array{E}(undef,len1 ÷ sizeof(E))) 19 end 20 close(f) 21 out 22 end 23 24 +zdecode(ain, ::VLenArrayFilter{T}) where T <: AbstractArray{E} where E = _zdecode(ain, E) 25 +zdecode(ain, ::VLenArrayFilter{V}) where V = _zdecode(ain, V) 26 + 27 #Encodes Array of Vectors a into bytes 28 function zencode(ain,::VLenArrayFilter) 29 b = IOBuffer() 30 @@ -51,8 +54,46 @@ function zencode(ain,::VLenArrayFilter) 31 take!(b) 32 end 33 34 -JSON.lower(::VLenArrayFilter{T}) where T = Dict("id"=>"vlen-array","dtype"=> typestr(T) ) 35 +JSON.lower(::VLenArrayFilter{T}) where T = Dict("id"=>"vlen-array","dtype"=> typestr(eltype(T)) ) 36 + 37 +getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{Vector{typestr(f["dtype"])}}() 38 + 39 +""" 40 + VLenUTF8Filter 41 + 42 +Encodes and decodes variable-length arrays of arbitrary data type 43 +""" 44 +struct VLenUTF8Filter <: Filter{String,UInt8} end 45 + 46 +function zdecode(ain, ::VLenUTF8Filter) 47 + arbuf = UInt8[] 48 + f = IOBuffer(ain) 49 + nitems = read(f, UInt32) 50 + out = Array{String}(undef,nitems) 51 + for i=1:nitems 52 + len1 = read(f,UInt32) 53 + resize!(arbuf,len1) 54 + read!(f,arbuf) 55 + out[i] = String(arbuf) 56 + end 57 + close(f) 58 + out 59 +end 60 + 61 +#Encodes Array of Vectors a into bytes 62 +function zencode(ain,::VLenUTF8Filter) 63 + b = IOBuffer() 64 + nitems = length(ain) 65 + write(b,UInt32(nitems)) 66 + for a in ain 67 + write(b, UInt32(sizeof(a))) 68 + write(b, a) 69 + end 70 + take!(b) 71 +end 72 + 73 +JSON.lower(::VLenUTF8Filter) = Dict("id"=>"vlen-utf8","dtype"=> "|O" ) 74 75 -getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{typestr(f["dtype"])}() 76 +getfilter(::Type{<:VLenUTF8Filter}, f) = VLenUTF8Filter() 77 78 -filterdict = Dict("vlen-array"=>VLenArrayFilter) 79 \ No newline at end of file 80 +const filterdict = Dict("vlen-array"=>VLenArrayFilter, "vlen-utf8"=>VLenUTF8Filter) 81 diff --git a/src/ZArray.jl b/src/ZArray.jl 82 index 4e7b300..c41dd96 100644 83 --- a/src/ZArray.jl 84 +++ b/src/ZArray.jl 85 @@ -135,11 +135,7 @@ function getchunkarray(z::ZArray{>:Missing}) 86 inner = fill(z.metadata.fill_value, z.metadata.chunks) 87 a = SenMissArray(inner,z.metadata.fill_value) 88 end 89 -_zero(T) = zero(T) 90 -_zero(T::Type{<:MaxLengthString}) = T("") 91 -_zero(T::Type{ASCIIChar}) = ASCIIChar(0) 92 -_zero(::Type{<:Vector{T}}) where T = T[] 93 -getchunkarray(z::ZArray) = fill(_zero(eltype(z)), z.metadata.chunks) 94 +getchunkarray(z::ZArray) = Array{eltype(z)}(undef, z.metadata.chunks...) 95 96 maybeinner(a::Array) = a 97 maybeinner(a::SenMissArray) = a.x 98 @@ -254,6 +250,10 @@ Read the chunk specified by `i` from the Zarray `z` and write its content to `a` 99 """ 100 function uncompress_raw!(a,z::ZArray{<:Any,N},curchunk) where N 101 if curchunk === nothing 102 + @assert eltype(a) == typeof(z.metadata.fill_value) "Type mismatch \ 103 + between array element of type '$(eltype(a))' \ 104 + and default value of type '$(typeof(z.metadata.fill_value))'. \ 105 + Use a different default value." 106 fill!(a, z.metadata.fill_value) 107 else 108 zuncompress!(a, curchunk, z.metadata.compressor, z.metadata.filters) 109 @@ -355,6 +355,7 @@ function zcreate(::Type{T},storage::AbstractStore, 110 end 111 112 filterfromtype(::Type{<:Any}) = nothing 113 +filterfromtype(::Type{<:AbstractString}) = (VLenUTF8Filter(),) 114 115 function filterfromtype(::Type{<:AbstractArray{T}}) where T 116 #Here we have to apply the vlenarray filter 117 diff --git a/src/ZGroup.jl b/src/ZGroup.jl 118 index 8bc54be..6d92548 100644 119 --- a/src/ZGroup.jl 120 +++ b/src/ZGroup.jl 121 @@ -19,6 +19,10 @@ function ZGroup(s::T,mode="r",path="";fill_as_missing=false) where T <: Abstract 122 groups = Dict{String, ZGroup}() 123 124 for d in subdirs(s,path) 125 + @debug if d == path 126 + @warn "Store is corrupted, probably has keys starting with '/' (it should not!)." 127 + continue 128 + end 129 dshort = split(d,'/')[end] 130 m = zopen_noerr(s,mode,path=_concatpath(path,dshort),fill_as_missing=fill_as_missing) 131 if isa(m, ZArray) 132 diff --git a/src/metadata.jl b/src/metadata.jl 133 index ae50634..7a8b0b9 100644 134 --- a/src/metadata.jl 135 +++ b/src/metadata.jl 136 @@ -53,6 +53,7 @@ Base.zero(t::Union{DateTime64, Type{<:DateTime64}}) = t(0) 137 138 139 typestr(t::Type) = string('<', 'V', sizeof(t)) 140 +typestr(t::Type{<:AbstractString}) = string('<', 'O') 141 typestr(t::Type{>:Missing}) = typestr(Base.nonmissingtype(t)) 142 typestr(t::Type{Bool}) = string('<', 'b', sizeof(t)) 143 typestr(t::Type{<:Signed}) = string('<', 'i', sizeof(t)) 144 @@ -63,6 +64,7 @@ typestr(::Type{MaxLengthString{N,UInt32}}) where N = string('<', 'U', N) 145 typestr(::Type{MaxLengthString{N,UInt8}}) where N = string('<', 'S', N) 146 typestr(::Type{<:Array}) = "|O" 147 typestr(::Type{<:DateTime64{P}}) where P = "<M8[$(pdt64string[P])]" 148 +typestr(t::Type{Union{Nothing, T}}) where T = typestr(T) 149 150 const typestr_regex = r"^([<|>])([tbiufcmMOSUV])(\d*)(\[\w+\])?$" 151 const typemap = Dict{Tuple{Char, Int}, DataType}( 152 @@ -96,7 +98,7 @@ function typestr(s::AbstractString, filterlist=nothing) 153 if filterlist === nothing 154 throw(ArgumentError("Object array can only be parsed when an appropriate filter is defined")) 155 end 156 - return Vector{sourcetype(first(filterlist))} 157 + return sourcetype(first(filterlist)) 158 end 159 isempty(typesize) && throw((ArgumentError("$s is not a valid numpy typestr"))) 160 tc, ts = first(typecode), parse(Int, typesize)