json_schema_utils.rb
1 module JSONSchemaUtils 2 3 def self.fragment_join(fragment, property = nil) 4 fragment = fragment.gsub(/^#\//, "") 5 property = property.gsub(/^#\//, "") if property 6 7 if property && fragment != "" && fragment !~ /\/$/ 8 fragment = "#{fragment}/" 9 end 10 11 "#{fragment}#{property}" 12 end 13 14 15 def self.schema_path_lookup(schema, path) 16 if path.is_a? String 17 return self.schema_path_lookup(schema, path.split("/")) 18 end 19 20 if schema.has_key?('properties') 21 schema = schema['properties'] 22 end 23 24 if path.length == 1 25 schema[path.first] 26 else 27 if schema[path.first] 28 self.schema_path_lookup(schema[path.first], path.drop(1)) 29 else 30 nil 31 end 32 end 33 end 34 35 36 37 SCHEMA_PARSE_RULES = 38 [ 39 { 40 :failed_attribute => ['Properties', 'IfMissing', 'ArchivesSpaceSubType'], 41 :pattern => /([A-Z]+: )?The property '.*?' did not contain a required property of '(.*?)'.*/, 42 :do => ->(msgs, message, path, type, property) { 43 if type && type =~ /ERROR/ 44 msgs[:errors][fragment_join(path, property)] = ["Property is required but was missing"] 45 else 46 msgs[:warnings][fragment_join(path, property)] = ["Property was missing"] 47 end 48 } 49 }, 50 51 { 52 :failed_attribute => ['ArchivesSpaceType'], 53 :pattern => /The property '#(.*?)' was not a well-formed date/, 54 :do => ->(msgs, message, path, property) { 55 msgs[:errors][fragment_join(path)] = ["Not a valid date"] 56 } 57 }, 58 59 { 60 :failed_attribute => ['Pattern'], 61 :pattern => /The property '#\/.*?' value "(.*?)" did not match the regex '(.*?)' in schema/, 62 :do => ->(msgs, message, path, value, regexp) { 63 msgs[:errors][fragment_join(path)] = ["Value '#{value}' did not match regular expression: #{regexp}"] 64 } 65 }, 66 67 { 68 :failed_attribute => ['MinLength'], 69 :pattern => /The property '#\/.*?' was not of a minimum string length of ([0-9]+) in schema/, 70 :do => ->(msgs, message, path, length) { 71 msgs[:errors][fragment_join(path)] = ["Must be at least #{length} characters"] 72 } 73 }, 74 75 { 76 :failed_attribute => ['MaxLength'], 77 :pattern => /The property '#\/.*?' was not of a maximum string length of ([0-9]+) in schema/, 78 :do => ->(msgs, message, path, length) { 79 msgs[:errors][fragment_join(path)] = ["Must be #{length} characters or fewer"] 80 } 81 }, 82 83 { 84 :failed_attribute => ['MinItems'], 85 :pattern => /The property '#\/.*?' did not contain a minimum number of items ([0-9]+) in schema/, 86 :do => ->(msgs, message, path, items) { 87 msgs[:errors][fragment_join(path)] = ["At least #{items} item(s) is required"] 88 } 89 }, 90 91 { 92 :failed_attribute => ['Enum'], 93 :pattern => /The property '#\/.*?' value "(.*?)" .*values: (.*) in schema/, 94 :do => ->(msgs, message, path, invalid, valid_set) { 95 msgs[:errors][fragment_join(path)] = ["Invalid value '#{invalid}'. Must be one of: #{valid_set}"] 96 } 97 }, 98 99 { 100 :failed_attribute => ['ArchivesSpaceDynamicEnum'], 101 :pattern => /The property '#\/.*?' value "(.*?)" .*values: (.*) in schema/, 102 :do => ->(msgs, message, path, invalid, valid_set) { 103 msgs[:attribute_types][fragment_join(path)] = 'ArchivesSpaceDynamicEnum' 104 msgs[:errors][fragment_join(path)] = ["Invalid value '#{invalid}'. Must be one of: #{valid_set}"] 105 } 106 }, 107 { 108 :failed_attribute => ['ArchivesSpaceReadOnlyDynamicEnum'], 109 :pattern => /The property '#\/.*?' value "(.*?)" .*values: (.*) in schema/, 110 :do => ->(msgs, message, path, invalid, valid_set) { 111 msgs[:attribute_types][fragment_join(path)] = 'ArchivesSpaceReadOnlyDynamicEnum' 112 msgs[:errors][fragment_join(path)] = ["Protected read-only list #{path}. Invalid value '#{invalid}'. Must be one of: #{valid_set}"] 113 } 114 }, 115 116 { 117 :failed_attribute => ['Type', 'ArchivesSpaceType'], 118 :pattern => /The property '#\/.*?' of type (.*?) did not match the following type: (.*?) in schema/, 119 :do => ->(msgs, message, path, actual_type, desired_type) { 120 if actual_type !~ /JSONModel/ || message[:failed_attribute] == 'ArchivesSpaceType' 121 # We'll skip JSONModels because the specific problem with the 122 # document will have already been listed separately. 123 124 msgs[:state][fragment_join(path)] ||= [] 125 msgs[:state][fragment_join(path)] << desired_type 126 127 if msgs[:state][fragment_join(path)].length == 1 128 msgs[:errors][fragment_join(path)] = ["Must be a #{desired_type} (you provided a #{actual_type})"] 129 # a little better messages for malformed uri 130 if desired_type =~ /uri$/ 131 msgs[:errors][fragment_join(path)].first << " (malformed or invalid uri? check if referenced object exists.)" 132 end 133 else 134 msgs[:errors][fragment_join(path)] = ["Must be one of: #{msgs[:state][fragment_join(path)].join (", ")} (you provided a #{actual_type})"] 135 end 136 end 137 138 } 139 }, 140 141 { 142 :failed_attribute => ['custom_validation'], 143 :pattern => /Validation failed for '(.*?)': (.*?) in schema /, 144 :do => ->(msgs, message, path, property, msg) { 145 property = (property && !property.empty?) ? property : nil 146 msgs[:errors][fragment_join(path, property)] = [msg] 147 } 148 }, 149 150 { 151 :failed_attribute => ['custom_validation'], 152 :pattern => /Warning generated for '(.*?)': (.*?) in schema /, 153 :do => ->(msgs, message, path, property, msg) { 154 msgs[:warnings][fragment_join(path, property)] = [msg] 155 } 156 }, 157 158 { 159 :failed_attribute => ['custom_validation'], 160 :pattern => /Validation error code: (.*?) in schema /, 161 :do => ->(msgs, message, path, error_code) { 162 msgs[:errors]['coded_errors'] = [error_code] 163 } 164 }, 165 166 167 # Catch all 168 { 169 :failed_attribute => nil, 170 :pattern => /^(.*)$/, 171 :do => ->(msgs, message, path, msg) { 172 msgs[:errors]['unknown'] = [msg] 173 } 174 } 175 ] 176 177 178 # For a given error, find its list of sub errors. 179 def self.extract_suberrors(errors) 180 errors = Array[errors].flatten 181 182 result = errors.map do |error| 183 if !error[:errors] 184 error 185 else 186 self.extract_suberrors(error[:errors]) 187 end 188 end 189 190 result.flatten 191 end 192 193 194 # Given a list of error messages produced by JSON schema validation, parse 195 # them into a structured format like: 196 # 197 # { 198 # :errors => {:attr1 => "(What was wrong with attr1)"}, 199 # :warnings => {:attr2 => "(attr2 not quite right either)"} 200 # } 201 def self.parse_schema_messages(messages, validator) 202 messages = self.extract_suberrors(messages) 203 204 msgs = { 205 :errors => {}, 206 :warnings => {}, 207 # to lookup e.g., msgs[:attribute_types]['extents/0/extent_type'] => 'ArchivesSpaceDynamicEnum' 208 :attribute_types => {}, 209 :state => {} # give the parse rules somewhere to store useful state for a run 210 } 211 212 messages.each do |message| 213 214 SCHEMA_PARSE_RULES.each do |rule| 215 if (rule[:failed_attribute].nil? || rule[:failed_attribute].include?(message[:failed_attribute])) and 216 message[:message] =~ rule[:pattern] 217 218 rule[:do].call(msgs, message, message[:fragment], 219 *message[:message].scan(rule[:pattern]).flatten) 220 221 break 222 end 223 end 224 225 end 226 227 msgs.delete(:state) 228 msgs 229 end 230 231 232 # Given a hash representing a record tree, map across the hash and this 233 # model's schema in lockstep. 234 # 235 # Each proc in the 'transformations' array is called with the current node 236 # in the record tree as its first argument, and the part of the schema 237 # that corresponds to it. Whatever the proc returns is used to replace 238 # the node in the record tree. 239 # 240 def self.map_hash_with_schema(record, schema, transformations = []) 241 return record if not record.is_a?(Hash) 242 243 if schema.is_a?(String) 244 schema = resolve_schema_reference(schema) 245 end 246 247 # Sometimes a schema won't specify anything other than the required type 248 # (like {'type' => 'object'}). If there's nothing more to check, we're 249 # done. 250 return record if !schema.has_key?("properties") 251 252 253 # Apply transformations to the current level of the tree 254 transformations.each do |transform| 255 record = transform.call(record, schema) 256 end 257 258 # Now figure out how to traverse the remainder of the tree... 259 result = {} 260 261 record.each do |k, v| 262 k = k.to_s 263 properties = schema['properties'] 264 265 if properties.has_key?(k) && (properties[k]["type"] == "object") 266 result[k] = self.map_hash_with_schema(v, properties[k], transformations) 267 268 elsif v.is_a?(Array) && properties.has_key?(k) && (properties[k]["type"] == "array") 269 270 # Arrays are tricky because they can either consist of a single type, or 271 # a number of different types. 272 273 if properties[k]["items"]["type"].is_a?(Array) 274 result[k] = v.map {|elt| 275 276 if elt.is_a?(Hash) 277 next_schema = determine_schema_for(elt, properties[k]["items"]["type"]) 278 self.map_hash_with_schema(elt, next_schema, transformations) 279 elsif elt.is_a?(Array) 280 raise "Nested arrays aren't supported here (yet)" 281 else 282 elt 283 end 284 } 285 286 # The array contains a single type of object 287 elsif properties[k]["items"]["type"] === "object" 288 result[k] = v.map {|elt| self.map_hash_with_schema(elt, properties[k]["items"], transformations)} 289 else 290 # Just one valid type 291 result[k] = v.map {|elt| self.map_hash_with_schema(elt, properties[k]["items"]["type"], transformations)} 292 end 293 294 elsif (v.is_a?(Hash) || v.is_a?(Array)) && (properties.has_key?(k) && properties[k]["type"].is_a?(Array)) 295 # Multiple possible types for this single value 296 297 results = (v.is_a?(Array) ? v : [v]).map {|elt| 298 next_schema = determine_schema_for(elt, properties[k]["type"]) 299 self.map_hash_with_schema(elt, next_schema, transformations) 300 } 301 302 result[k] = v.is_a?(Array) ? results : results[0] 303 304 elsif properties.has_key?(k) && JSONModel.parse_jsonmodel_ref(properties[k]["type"]) 305 result[k] = self.map_hash_with_schema(v, properties[k]["type"], transformations) 306 else 307 result[k] = v 308 end 309 end 310 311 result 312 end 313 314 def self.blank?(obj) 315 obj.nil? || obj == '' || obj == {} 316 end 317 318 # Recursively walk a map and remove any empty strings, empty maps and nils. 319 # Recursively collapses elements so that if, for example, an map becomes 320 # empty after having its own empty elements removed, it gets removed as well. 321 def self.drop_empty_elements(obj) 322 queue = [obj] 323 to_visit = [] 324 325 while !queue.empty? 326 obj = queue.shift 327 328 if obj.is_a?(Hash) || obj.is_a?(Array) 329 (obj.is_a?(Hash) ? obj.values : obj).each do |v| 330 if v.is_a?(Hash) || v.is_a?(Array) 331 queue.push(v) 332 end 333 end 334 end 335 336 to_visit.unshift(obj) 337 end 338 339 while !to_visit.empty? 340 obj = to_visit.shift 341 342 if obj.is_a?(Array) 343 obj.reject! {|elt| blank?(elt)} 344 elsif obj.is_a?(Hash) 345 obj.keys.each do |k| 346 if blank?(obj[k]) 347 obj.delete(k) 348 end 349 end 350 end 351 end 352 353 obj 354 end 355 356 # Drop any keys from 'hash' that aren't defined in the JSON schema. 357 # 358 # If drop_readonly is true, also drop any values where the schema has 359 # 'readonly' set to true. These values are produced by the system for the 360 # client, but are not part of the data model. 361 # 362 def self.drop_unknown_properties(hash, schema, drop_readonly = false) 363 fn = proc do |hash, schema| 364 result = {} 365 366 hash.each do |k, v| 367 if schema["properties"].has_key?(k.to_s) && (!drop_readonly || !schema["properties"][k.to_s]["readonly"]) 368 result[k] = v 369 end 370 end 371 372 result 373 end 374 375 hash = drop_empty_elements(hash) 376 map_hash_with_schema(hash, schema, [fn]) 377 end 378 379 380 def self.apply_schema_defaults(hash, schema) 381 fn = proc do |hash, schema| 382 result = hash.clone 383 384 schema["properties"].each do |property, definition| 385 386 if definition.has_key?("default") && !hash.has_key?(property.to_s) && !hash.has_key?(property.intern) 387 result[property] = definition["default"] 388 elsif definition['type'] == 'array' && !hash.has_key?(property.to_s) && !hash.has_key?(property.intern) 389 # Array values that weren't provided default to empty 390 result[property] = [] 391 end 392 393 end 394 395 result 396 end 397 398 map_hash_with_schema(hash, schema, [fn]) 399 end 400 401 402 private 403 404 def self.resolve_schema_reference(schema_reference) 405 # This should be a reference to a different JSONModel type. Resolve it 406 # and return its schema. 407 ref = JSONModel.parse_jsonmodel_ref(schema_reference) 408 raise "Invalid schema given: #{schema_reference}" if !ref 409 410 JSONModel.JSONModel(ref[0]).schema 411 end 412 413 414 def self.determine_schema_for(elt, possible_schemas) 415 # A number of different types. Match them up based on the value of the 'jsonmodel_type' property 416 schema_types = possible_schemas.map {|schema| schema.is_a?(Hash) ? schema["type"] : schema} 417 418 jsonmodel_type = elt["jsonmodel_type"] || elt[:jsonmodel_type] 419 420 if !jsonmodel_type 421 raise JSONModel::ValidationException.new(:errors => {"record" => ["Can't unambiguously match #{elt.inspect} against schema types: #{schema_types.inspect}. " + 422 "Resolve this by adding a 'jsonmodel_type' property to #{elt.inspect}"]}) 423 end 424 425 next_schema = schema_types.find {|type| 426 (type.is_a?(String) && type.include?("JSONModel(:#{jsonmodel_type})")) || 427 (type.is_a?(Hash) && type["jsonmodel_type"] === jsonmodel_type) 428 } 429 430 if next_schema.nil? 431 raise "Couldn't determine type of '#{elt.inspect}'. Must be one of: #{schema_types.inspect}" 432 end 433 434 next_schema 435 end 436 437 438 end