/ common / json_schema_utils.rb
json_schema_utils.rb
  1  module JSONSchemaUtils
  2  
  3    def self.fragment_join(fragment, property = nil)
  4      fragment = fragment.gsub(/^#\//, "")
  5      property = property.gsub(/^#\//, "") if property
  6  
  7      if property && fragment != "" && fragment !~ /\/$/
  8        fragment = "#{fragment}/"
  9      end
 10  
 11      "#{fragment}#{property}"
 12    end
 13  
 14  
 15    def self.schema_path_lookup(schema, path)
 16      if path.is_a? String
 17        return self.schema_path_lookup(schema, path.split("/"))
 18      end
 19  
 20      if schema.has_key?('properties')
 21        schema = schema['properties']
 22      end
 23  
 24      if path.length == 1
 25        schema[path.first]
 26      else
 27        if schema[path.first]
 28          self.schema_path_lookup(schema[path.first], path.drop(1))
 29        else
 30          nil
 31        end
 32      end
 33    end
 34  
 35  
 36  
 37    SCHEMA_PARSE_RULES =
 38      [
 39       {
 40         :failed_attribute => ['Properties', 'IfMissing', 'ArchivesSpaceSubType'],
 41         :pattern => /([A-Z]+: )?The property '.*?' did not contain a required property of '(.*?)'.*/,
 42         :do => ->(msgs, message, path, type, property) {
 43           if type && type =~ /ERROR/
 44             msgs[:errors][fragment_join(path, property)] = ["Property is required but was missing"]
 45           else
 46             msgs[:warnings][fragment_join(path, property)] = ["Property was missing"]
 47           end
 48         }
 49       },
 50  
 51       {
 52         :failed_attribute => ['ArchivesSpaceType'],
 53         :pattern => /The property '#(.*?)' was not a well-formed date/,
 54         :do => ->(msgs, message, path, property) {
 55           msgs[:errors][fragment_join(path)] = ["Not a valid date"]
 56         }
 57       },
 58  
 59       {
 60         :failed_attribute => ['Pattern'],
 61         :pattern => /The property '#\/.*?' value "(.*?)" did not match the regex '(.*?)' in schema/,
 62         :do => ->(msgs, message, path, value, regexp) {
 63           msgs[:errors][fragment_join(path)] = ["Value '#{value}' did not match regular expression: #{regexp}"]
 64         }
 65       },
 66  
 67       {
 68         :failed_attribute => ['MinLength'],
 69         :pattern => /The property '#\/.*?' was not of a minimum string length of ([0-9]+) in schema/,
 70         :do => ->(msgs, message, path, length) {
 71           msgs[:errors][fragment_join(path)] = ["Must be at least #{length} characters"]
 72         }
 73       },
 74  
 75       {
 76         :failed_attribute => ['MaxLength'],
 77         :pattern => /The property '#\/.*?' was not of a maximum string length of ([0-9]+) in schema/,
 78         :do => ->(msgs, message, path, length) {
 79           msgs[:errors][fragment_join(path)] = ["Must be #{length} characters or fewer"]
 80         }
 81       },
 82  
 83       {
 84         :failed_attribute => ['MinItems'],
 85         :pattern => /The property '#\/.*?' did not contain a minimum number of items ([0-9]+) in schema/,
 86         :do => ->(msgs, message, path, items) {
 87           msgs[:errors][fragment_join(path)] = ["At least #{items} item(s) is required"]
 88         }
 89       },
 90  
 91       {
 92         :failed_attribute => ['Enum'],
 93         :pattern => /The property '#\/.*?' value "(.*?)" .*values: (.*) in schema/,
 94         :do => ->(msgs, message, path, invalid, valid_set) {
 95           msgs[:errors][fragment_join(path)] = ["Invalid value '#{invalid}'.  Must be one of: #{valid_set}"]
 96         }
 97       },
 98  
 99       {
100         :failed_attribute => ['ArchivesSpaceDynamicEnum'],
101         :pattern => /The property '#\/.*?' value "(.*?)" .*values: (.*) in schema/,
102         :do => ->(msgs, message, path, invalid, valid_set) {
103           msgs[:attribute_types][fragment_join(path)] = 'ArchivesSpaceDynamicEnum'
104           msgs[:errors][fragment_join(path)] = ["Invalid value '#{invalid}'.  Must be one of: #{valid_set}"]
105         }
106       },
107       {
108         :failed_attribute => ['ArchivesSpaceReadOnlyDynamicEnum'],
109         :pattern => /The property '#\/.*?' value "(.*?)" .*values: (.*) in schema/,
110         :do => ->(msgs, message, path, invalid, valid_set) {
111           msgs[:attribute_types][fragment_join(path)] = 'ArchivesSpaceReadOnlyDynamicEnum'
112           msgs[:errors][fragment_join(path)] = ["Protected read-only list #{path}. Invalid value '#{invalid}'.  Must be one of: #{valid_set}"]
113         }
114       },
115  
116       {
117         :failed_attribute => ['Type', 'ArchivesSpaceType'],
118         :pattern => /The property '#\/.*?' of type (.*?) did not match the following type: (.*?) in schema/,
119         :do => ->(msgs, message, path, actual_type, desired_type) {
120           if actual_type !~ /JSONModel/ || message[:failed_attribute] == 'ArchivesSpaceType'
121             # We'll skip JSONModels because the specific problem with the
122             # document will have already been listed separately.
123  
124             msgs[:state][fragment_join(path)] ||= []
125             msgs[:state][fragment_join(path)] << desired_type
126  
127             if msgs[:state][fragment_join(path)].length == 1
128               msgs[:errors][fragment_join(path)] = ["Must be a #{desired_type} (you provided a #{actual_type})"]
129               # a little better messages for malformed uri
130               if desired_type =~ /uri$/
131                 msgs[:errors][fragment_join(path)].first << " (malformed or invalid uri? check if referenced object exists.)"
132               end
133             else
134               msgs[:errors][fragment_join(path)] = ["Must be one of: #{msgs[:state][fragment_join(path)].join (", ")} (you provided a #{actual_type})"]
135             end
136           end
137  
138         }
139       },
140  
141       {
142         :failed_attribute => ['custom_validation'],
143         :pattern => /Validation failed for '(.*?)': (.*?) in schema /,
144         :do => ->(msgs, message, path, property, msg) {
145           property = (property && !property.empty?) ? property : nil
146           msgs[:errors][fragment_join(path, property)] = [msg]
147         }
148       },
149  
150       {
151         :failed_attribute => ['custom_validation'],
152         :pattern => /Warning generated for '(.*?)': (.*?) in schema /,
153         :do => ->(msgs, message, path, property, msg) {
154           msgs[:warnings][fragment_join(path, property)] = [msg]
155         }
156       },
157  
158       {
159         :failed_attribute => ['custom_validation'],
160         :pattern => /Validation error code: (.*?) in schema /,
161         :do => ->(msgs, message, path, error_code) {
162           msgs[:errors]['coded_errors'] = [error_code]
163         }
164       },
165  
166  
167       # Catch all
168       {
169         :failed_attribute => nil,
170         :pattern => /^(.*)$/,
171         :do => ->(msgs, message, path, msg) {
172           msgs[:errors]['unknown'] = [msg]
173         }
174       }
175      ]
176  
177  
178    # For a given error, find its list of sub errors.
179    def self.extract_suberrors(errors)
180      errors = Array[errors].flatten
181  
182      result = errors.map do |error|
183        if !error[:errors]
184          error
185        else
186          self.extract_suberrors(error[:errors])
187        end
188      end
189  
190      result.flatten
191    end
192  
193  
194    # Given a list of error messages produced by JSON schema validation, parse
195    # them into a structured format like:
196    #
197    # {
198    #   :errors => {:attr1 => "(What was wrong with attr1)"},
199    #   :warnings => {:attr2 => "(attr2 not quite right either)"}
200    # }
201    def self.parse_schema_messages(messages, validator)
202      messages = self.extract_suberrors(messages)
203  
204      msgs = {
205        :errors => {},
206        :warnings => {},
207        # to lookup e.g., msgs[:attribute_types]['extents/0/extent_type'] => 'ArchivesSpaceDynamicEnum'
208        :attribute_types => {},
209        :state => {}              # give the parse rules somewhere to store useful state for a run
210      }
211  
212      messages.each do |message|
213  
214        SCHEMA_PARSE_RULES.each do |rule|
215          if (rule[:failed_attribute].nil? || rule[:failed_attribute].include?(message[:failed_attribute])) and
216              message[:message] =~ rule[:pattern]
217  
218            rule[:do].call(msgs, message, message[:fragment],
219                           *message[:message].scan(rule[:pattern]).flatten)
220  
221            break
222          end
223        end
224  
225      end
226  
227      msgs.delete(:state)
228      msgs
229    end
230  
231  
232    # Given a hash representing a record tree, map across the hash and this
233    # model's schema in lockstep.
234    #
235    # Each proc in the 'transformations' array is called with the current node
236    # in the record tree as its first argument, and the part of the schema
237    # that corresponds to it.  Whatever the proc returns is used to replace
238    # the node in the record tree.
239    #
240    def self.map_hash_with_schema(record, schema, transformations = [])
241      return record if not record.is_a?(Hash)
242  
243      if schema.is_a?(String)
244        schema = resolve_schema_reference(schema)
245      end
246  
247      # Sometimes a schema won't specify anything other than the required type
248      # (like {'type' => 'object'}).  If there's nothing more to check, we're
249      # done.
250      return record if !schema.has_key?("properties")
251  
252  
253      # Apply transformations to the current level of the tree
254      transformations.each do |transform|
255        record = transform.call(record, schema)
256      end
257  
258      # Now figure out how to traverse the remainder of the tree...
259      result = {}
260  
261      record.each do |k, v|
262        k = k.to_s
263        properties = schema['properties']
264  
265        if properties.has_key?(k) && (properties[k]["type"] == "object")
266          result[k] = self.map_hash_with_schema(v, properties[k], transformations)
267  
268        elsif v.is_a?(Array) && properties.has_key?(k) && (properties[k]["type"] == "array")
269  
270          # Arrays are tricky because they can either consist of a single type, or
271          # a number of different types.
272  
273          if properties[k]["items"]["type"].is_a?(Array)
274            result[k] = v.map {|elt|
275  
276              if elt.is_a?(Hash)
277                next_schema = determine_schema_for(elt, properties[k]["items"]["type"])
278                self.map_hash_with_schema(elt, next_schema, transformations)
279              elsif elt.is_a?(Array)
280                raise "Nested arrays aren't supported here (yet)"
281              else
282                elt
283              end
284            }
285  
286          # The array contains a single type of object
287          elsif properties[k]["items"]["type"] === "object"
288            result[k] = v.map {|elt| self.map_hash_with_schema(elt, properties[k]["items"], transformations)}
289          else
290            # Just one valid type
291            result[k] = v.map {|elt| self.map_hash_with_schema(elt, properties[k]["items"]["type"], transformations)}
292          end
293  
294        elsif (v.is_a?(Hash) || v.is_a?(Array)) && (properties.has_key?(k) && properties[k]["type"].is_a?(Array))
295          # Multiple possible types for this single value
296  
297          results = (v.is_a?(Array) ? v : [v]).map {|elt|
298            next_schema = determine_schema_for(elt, properties[k]["type"])
299            self.map_hash_with_schema(elt, next_schema, transformations)
300          }
301  
302          result[k] = v.is_a?(Array) ? results : results[0]
303  
304        elsif properties.has_key?(k) && JSONModel.parse_jsonmodel_ref(properties[k]["type"])
305          result[k] = self.map_hash_with_schema(v, properties[k]["type"], transformations)
306        else
307          result[k] = v
308        end
309      end
310  
311      result
312    end
313  
314    def self.blank?(obj)
315      obj.nil? || obj == '' || obj == {}
316    end
317  
318    # Recursively walk a map and remove any empty strings, empty maps and nils.
319    # Recursively collapses elements so that if, for example, an map becomes
320    # empty after having its own empty elements removed, it gets removed as well.
321    def self.drop_empty_elements(obj)
322      queue = [obj]
323      to_visit = []
324  
325      while !queue.empty?
326        obj = queue.shift
327  
328        if obj.is_a?(Hash) || obj.is_a?(Array)
329          (obj.is_a?(Hash) ? obj.values : obj).each do |v|
330            if v.is_a?(Hash) || v.is_a?(Array)
331              queue.push(v)
332            end
333          end
334        end
335  
336        to_visit.unshift(obj)
337      end
338  
339      while !to_visit.empty?
340        obj = to_visit.shift
341  
342        if obj.is_a?(Array)
343          obj.reject! {|elt| blank?(elt)}
344        elsif obj.is_a?(Hash)
345          obj.keys.each do |k|
346            if blank?(obj[k])
347              obj.delete(k)
348            end
349          end
350        end
351      end
352  
353      obj
354    end
355  
356    # Drop any keys from 'hash' that aren't defined in the JSON schema.
357    #
358    # If drop_readonly is true, also drop any values where the schema has
359    # 'readonly' set to true.  These values are produced by the system for the
360    # client, but are not part of the data model.
361    #
362    def self.drop_unknown_properties(hash, schema, drop_readonly = false)
363      fn = proc do |hash, schema|
364        result = {}
365  
366        hash.each do |k, v|
367          if schema["properties"].has_key?(k.to_s) && (!drop_readonly || !schema["properties"][k.to_s]["readonly"])
368            result[k] = v
369          end
370        end
371  
372        result
373      end
374  
375      hash = drop_empty_elements(hash)
376      map_hash_with_schema(hash, schema, [fn])
377    end
378  
379  
380    def self.apply_schema_defaults(hash, schema)
381      fn = proc do |hash, schema|
382        result = hash.clone
383  
384        schema["properties"].each do |property, definition|
385  
386          if definition.has_key?("default") && !hash.has_key?(property.to_s) && !hash.has_key?(property.intern)
387            result[property] = definition["default"]
388          elsif definition['type'] == 'array' && !hash.has_key?(property.to_s) && !hash.has_key?(property.intern)
389            # Array values that weren't provided default to empty
390            result[property] = []
391          end
392  
393        end
394  
395        result
396      end
397  
398      map_hash_with_schema(hash, schema, [fn])
399    end
400  
401  
402    private
403  
404    def self.resolve_schema_reference(schema_reference)
405      # This should be a reference to a different JSONModel type.  Resolve it
406      # and return its schema.
407      ref = JSONModel.parse_jsonmodel_ref(schema_reference)
408      raise "Invalid schema given: #{schema_reference}" if !ref
409  
410      JSONModel.JSONModel(ref[0]).schema
411    end
412  
413  
414    def self.determine_schema_for(elt, possible_schemas)
415      # A number of different types.  Match them up based on the value of the 'jsonmodel_type' property
416      schema_types = possible_schemas.map {|schema| schema.is_a?(Hash) ? schema["type"] : schema}
417  
418      jsonmodel_type = elt["jsonmodel_type"] || elt[:jsonmodel_type]
419  
420      if !jsonmodel_type
421        raise JSONModel::ValidationException.new(:errors => {"record" => ["Can't unambiguously match #{elt.inspect} against schema types: #{schema_types.inspect}. " +
422                                                               "Resolve this by adding a 'jsonmodel_type' property to #{elt.inspect}"]})
423      end
424  
425      next_schema = schema_types.find {|type|
426        (type.is_a?(String) && type.include?("JSONModel(:#{jsonmodel_type})")) ||
427        (type.is_a?(Hash) && type["jsonmodel_type"] === jsonmodel_type)
428      }
429  
430      if next_schema.nil?
431        raise "Couldn't determine type of '#{elt.inspect}'.  Must be one of: #{schema_types.inspect}"
432      end
433  
434      next_schema
435    end
436  
437  
438  end