test_proto_json_utils.py
1 import base64 2 import datetime 3 import json 4 5 import numpy as np 6 import pandas as pd 7 import pytest 8 from google.protobuf.text_format import Parse as ParseTextIntoProto 9 10 from mlflow.entities import Experiment, Metric 11 from mlflow.entities.model_registry import ModelVersion, RegisteredModel 12 from mlflow.exceptions import MlflowException 13 from mlflow.protos.model_registry_pb2 import RegisteredModel as ProtoRegisteredModel 14 from mlflow.protos.service_pb2 import Experiment as ProtoExperiment 15 from mlflow.protos.service_pb2 import Metric as ProtoMetric 16 from mlflow.types import ColSpec, DataType, Schema, TensorSpec 17 from mlflow.types.schema import Array, Map, Object, Property 18 from mlflow.types.utils import _infer_schema 19 from mlflow.utils.proto_json_utils import ( 20 MlflowFailedTypeConversion, 21 _CustomJsonEncoder, 22 cast_df_types_according_to_schema, 23 dataframe_from_parsed_json, 24 dataframe_from_raw_json, 25 message_to_json, 26 parse_dict, 27 parse_tf_serving_input, 28 ) 29 30 from tests.protos.test_message_pb2 import SampleMessage 31 32 33 def test_message_to_json(): 34 json_out = message_to_json(Experiment("123", "name", "arty", "active").to_proto()) 35 assert json.loads(json_out) == { 36 "experiment_id": "123", 37 "name": "name", 38 "artifact_location": "arty", 39 "lifecycle_stage": "active", 40 } 41 42 original_proto_message = RegisteredModel( 43 name="model_1", 44 creation_timestamp=111, 45 last_updated_timestamp=222, 46 description="Test model", 47 latest_versions=[ 48 ModelVersion( 49 name="mv-1", 50 version="1", 51 creation_timestamp=333, 52 last_updated_timestamp=444, 53 description="v 1", 54 user_id="u1", 55 current_stage="Production", 56 source="A/B", 57 run_id="9245c6ce1e2d475b82af84b0d36b52f4", 58 status="READY", 59 status_message=None, 60 ), 61 ModelVersion( 62 name="mv-2", 63 version="2", 64 creation_timestamp=555, 65 last_updated_timestamp=666, 66 description="v 2", 67 user_id="u2", 68 current_stage="Staging", 69 source="A/C", 70 run_id="123", 71 status="READY", 72 status_message=None, 73 ), 74 ], 75 ).to_proto() 76 json_out = message_to_json(original_proto_message) 77 json_dict = json.loads(json_out) 78 assert json_dict == { 79 "name": "model_1", 80 "creation_timestamp": 111, 81 "last_updated_timestamp": 222, 82 "description": "Test model", 83 "latest_versions": [ 84 { 85 "name": "mv-1", 86 "version": "1", 87 "creation_timestamp": 333, 88 "last_updated_timestamp": 444, 89 "current_stage": "Production", 90 "description": "v 1", 91 "user_id": "u1", 92 "source": "A/B", 93 "run_id": "9245c6ce1e2d475b82af84b0d36b52f4", 94 "status": "READY", 95 }, 96 { 97 "name": "mv-2", 98 "version": "2", 99 "creation_timestamp": 555, 100 "last_updated_timestamp": 666, 101 "current_stage": "Staging", 102 "description": "v 2", 103 "user_id": "u2", 104 "source": "A/C", 105 "run_id": "123", 106 "status": "READY", 107 }, 108 ], 109 } 110 new_proto_message = ProtoRegisteredModel() 111 parse_dict(json_dict, new_proto_message) 112 assert original_proto_message == new_proto_message 113 114 test_message = ParseTextIntoProto( 115 """ 116 field_int32: 11 117 field_int64: 12 118 field_uint32: 13 119 field_uint64: 14 120 field_sint32: 15 121 field_sint64: 16 122 field_fixed32: 17 123 field_fixed64: 18 124 field_sfixed32: 19 125 field_sfixed64: 20 126 field_bool: true 127 field_string: "Im a string" 128 field_with_default1: 111 129 field_repeated_int64: [1, 2, 3] 130 field_enum: ENUM_VALUE1 131 field_inner_message { 132 field_inner_int64: 101 133 field_inner_repeated_int64: [102, 103] 134 } 135 field_inner_message { 136 field_inner_int64: 104 137 field_inner_repeated_int64: [105, 106] 138 } 139 oneof1: 207 140 [mlflow.ExtensionMessage.field_extended_int64]: 100 141 field_map1: [{key: 51 value: "52"}, {key: 53 value: "54"}] 142 field_map2: [{key: "61" value: 62}, {key: "63" value: 64}] 143 field_map3: [{key: 561 value: 562}, {key: 563 value: 564}] 144 field_map4: [{key: 71 145 value: {field_inner_int64: 72 146 field_inner_repeated_int64: [81, 82] 147 field_inner_string: "str1"}}, 148 {key: 73 149 value: {field_inner_int64: 74 150 field_inner_repeated_int64: 83 151 field_inner_string: "str2"}}] 152 """, 153 SampleMessage(), 154 ) 155 json_out = message_to_json(test_message) 156 json_dict = json.loads(json_out) 157 assert json_dict == { 158 "field_int32": 11, 159 "field_int64": 12, 160 "field_uint32": 13, 161 "field_uint64": 14, 162 "field_sint32": 15, 163 "field_sint64": 16, 164 "field_fixed32": 17, 165 "field_fixed64": 18, 166 "field_sfixed32": 19, 167 "field_sfixed64": 20, 168 "field_bool": True, 169 "field_string": "Im a string", 170 "field_with_default1": 111, 171 "field_repeated_int64": [1, 2, 3], 172 "field_enum": "ENUM_VALUE1", 173 "field_inner_message": [ 174 {"field_inner_int64": 101, "field_inner_repeated_int64": [102, 103]}, 175 {"field_inner_int64": 104, "field_inner_repeated_int64": [105, 106]}, 176 ], 177 "oneof1": 207, 178 # JSON doesn't support non-string keys, so the int keys will be converted to strings. 179 "field_map1": {"51": "52", "53": "54"}, 180 "field_map2": {"63": 64, "61": 62}, 181 "field_map3": {"561": 562, "563": 564}, 182 "field_map4": { 183 "73": { 184 "field_inner_int64": 74, 185 "field_inner_repeated_int64": [83], 186 "field_inner_string": "str2", 187 }, 188 "71": { 189 "field_inner_int64": 72, 190 "field_inner_repeated_int64": [81, 82], 191 "field_inner_string": "str1", 192 }, 193 }, 194 "[mlflow.ExtensionMessage.field_extended_int64]": "100", 195 } 196 new_test_message = SampleMessage() 197 parse_dict(json_dict, new_test_message) 198 assert new_test_message == test_message 199 200 201 def test_parse_dict(): 202 in_json = {"experiment_id": "123", "name": "name", "unknown": "field"} 203 message = ProtoExperiment() 204 parse_dict(in_json, message) 205 experiment = Experiment.from_proto(message) 206 assert experiment.experiment_id == "123" 207 assert experiment.name == "name" 208 assert experiment.artifact_location == "" 209 210 211 def test_parse_dict_int_as_string_backcompat(): 212 in_json = {"timestamp": "123"} 213 message = ProtoMetric() 214 parse_dict(in_json, message) 215 experiment = Metric.from_proto(message) 216 assert experiment.timestamp == 123 217 218 219 def assert_result(result, expected_result): 220 assert result.keys() == expected_result.keys() 221 for key in result: 222 assert (result[key] == expected_result[key]).all() 223 assert result[key].dtype == expected_result[key].dtype 224 225 226 def test_parse_tf_serving_dictionary(): 227 # instances are correctly aggregated to dict of input name -> tensor 228 tfserving_input = { 229 "instances": [ 230 {"a": "s1", "b": 1.1, "c": [1, 2, 3]}, 231 {"a": "s2", "b": 2.2, "c": [4, 5, 6]}, 232 {"a": "s3", "b": 3.3, "c": [7, 8, 9]}, 233 ] 234 } 235 # Without Schema 236 result = parse_tf_serving_input(tfserving_input) 237 expected_result_no_schema = { 238 "a": np.array(["s1", "s2", "s3"]), 239 "b": np.array([1.1, 2.2, 3.3]), 240 "c": np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), 241 } 242 assert_result(result, expected_result_no_schema) 243 244 # With schema 245 schema = Schema([ 246 TensorSpec(np.dtype("str"), [-1], "a"), 247 TensorSpec(np.dtype("float32"), [-1], "b"), 248 TensorSpec(np.dtype("int32"), [-1], "c"), 249 ]) 250 df_schema = Schema([ColSpec("string", "a"), ColSpec("float", "b"), ColSpec("integer", "c")]) 251 result = parse_tf_serving_input(tfserving_input, schema) 252 expected_result_schema = { 253 "a": np.array(["s1", "s2", "s3"], dtype=np.dtype("str")), 254 "b": np.array([1.1, 2.2, 3.3], dtype="float32"), 255 "c": np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int32"), 256 } 257 assert_result(result, expected_result_schema) 258 # With df Schema 259 result = parse_tf_serving_input(tfserving_input, df_schema) 260 assert_result(result, expected_result_schema) 261 # With df Schema containing array 262 new_schema = _infer_schema(tfserving_input["instances"]) 263 result = parse_tf_serving_input(tfserving_input, new_schema) 264 expected_result = { 265 "a": np.array(["s1", "s2", "s3"]), 266 "b": np.array([1.1, 2.2, 3.3], dtype="float64"), 267 "c": np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int64"), 268 } 269 assert_result(result, expected_result) 270 271 # input provided as a dict 272 tfserving_input = { 273 "inputs": { 274 "a": ["s1", "s2", "s3"], 275 "b": [1.1, 2.2, 3.3], 276 "c": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], 277 } 278 } 279 # Without Schema 280 result = parse_tf_serving_input(tfserving_input) 281 assert_result(result, expected_result_no_schema) 282 283 # With Schema 284 result = parse_tf_serving_input(tfserving_input, schema) 285 assert_result(result, expected_result_schema) 286 287 # With df Schema 288 result = parse_tf_serving_input(tfserving_input, df_schema) 289 assert_result(result, expected_result_schema) 290 291 292 def test_parse_tf_serving_arbitrary_input_dictionary(): 293 # input provided as a columnar dict with an arbitrary shape for each input, specifically a 294 # different 0th dimension. 295 tfserving_input_arbitrary = { 296 "inputs": { 297 "a": [["s1", "s2", "s3"], ["s4", "s5", "s6"]], # [2, 3] 298 "b": [1.1, 2.2, 3.3], # [3, ] 299 "c": [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]], # [4, 3] 300 } 301 } 302 303 schema = Schema([ 304 TensorSpec(np.dtype("str"), [-1, 3], "a"), 305 TensorSpec(np.dtype("float32"), [-1], "b"), 306 TensorSpec(np.dtype("int32"), [-1, 4], "c"), 307 ]) 308 df_schema = Schema([ColSpec("string", "a"), ColSpec("float", "b"), ColSpec("integer", "c")]) 309 310 expected_result_no_schema_arbitrary = { 311 "a": np.array([["s1", "s2", "s3"], ["s4", "s5", "s6"]]), 312 "b": np.array([1.1, 2.2, 3.3]), 313 "c": np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]), 314 } 315 expected_result_schema_arbitrary = { 316 "a": np.array([["s1", "s2", "s3"], ["s4", "s5", "s6"]], dtype=np.dtype("str")), 317 "b": np.array([1.1, 2.2, 3.3], dtype="float32"), 318 "c": np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype="int32"), 319 } 320 321 # Without Schema 322 result = parse_tf_serving_input(tfserving_input_arbitrary) 323 assert_result(result, expected_result_no_schema_arbitrary) 324 325 # With Schema 326 result = parse_tf_serving_input(tfserving_input_arbitrary, schema) 327 assert_result(result, expected_result_schema_arbitrary) 328 329 # With df Schema 330 result = parse_tf_serving_input(tfserving_input_arbitrary, df_schema) 331 assert_result(result, expected_result_schema_arbitrary) 332 333 334 def test_parse_tf_serving_single_array(): 335 def assert_result(result, expected_result): 336 assert (result == expected_result).all() 337 338 # values for each column are properly converted to a tensor 339 arr = [ 340 [[1, 2, 3], [4, 5, 6], [7, 8, 9]], 341 [[3, 2, 1], [6, 5, 4], [9, 8, 7]], 342 ] 343 tfserving_instances = {"instances": arr} 344 tfserving_inputs = {"inputs": arr} 345 346 # Without schema 347 instance_result = parse_tf_serving_input(tfserving_instances) 348 assert instance_result.shape == (2, 3, 3) 349 assert_result(instance_result, np.array(arr, dtype="int64")) 350 351 input_result = parse_tf_serving_input(tfserving_inputs) 352 assert input_result.shape == (2, 3, 3) 353 assert_result(input_result, np.array(arr, dtype="int64")) 354 355 # Unnamed schema 356 schema = Schema([TensorSpec(np.dtype("float32"), [-1])]) 357 instance_result = parse_tf_serving_input(tfserving_instances, schema) 358 assert_result(instance_result, np.array(arr, dtype="float32")) 359 360 input_result = parse_tf_serving_input(tfserving_inputs, schema) 361 assert_result(input_result, np.array(arr, dtype="float32")) 362 363 # named schema 364 schema = Schema([TensorSpec(np.dtype("float32"), [-1], "a")]) 365 instance_result = parse_tf_serving_input(tfserving_instances, schema) 366 assert isinstance(instance_result, dict) 367 assert len(instance_result.keys()) == 1 368 assert "a" in instance_result 369 assert_result(instance_result["a"], np.array(arr, dtype="float32")) 370 371 input_result = parse_tf_serving_input(tfserving_inputs, schema) 372 assert isinstance(input_result, dict) 373 assert len(input_result.keys()) == 1 374 assert "a" in input_result 375 assert_result(input_result["a"], np.array(arr, dtype="float32")) 376 377 378 def test_parse_tf_serving_raises_expected_errors(): 379 # input is bad if a column value is missing for a row/instance 380 tfserving_instances = { 381 "instances": [ 382 {"a": "s1", "b": 1}, 383 {"a": "s2", "b": 2, "c": [4, 5, 6]}, 384 {"a": "s3", "b": 3, "c": [7, 8, 9]}, 385 ] 386 } 387 with pytest.raises( 388 MlflowException, match="The length of values for each input/column name are not the same" 389 ): 390 parse_tf_serving_input(tfserving_instances) 391 392 # cannot specify both instance and inputs 393 tfserving_input = { 394 "instances": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], 395 "inputs": {"a": ["s1", "s2", "s3"], "b": [1, 2, 3], "c": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, 396 } 397 match = 'Invalid input. One of "instances" and "inputs" must be specified' 398 with pytest.raises(MlflowException, match=match): 399 parse_tf_serving_input(tfserving_input) 400 401 # cannot specify signature name 402 tfserving_input = { 403 "signature_name": "hello", 404 "inputs": {"a": ["s1", "s2", "s3"], "b": [1, 2, 3], "c": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, 405 } 406 match = '"signature_name" parameter is currently not supported' 407 with pytest.raises(MlflowException, match=match): 408 parse_tf_serving_input(tfserving_input) 409 410 411 def test_dataframe_from_json(): 412 source = pd.DataFrame( 413 { 414 "boolean": [True, False, True], 415 "string": ["a", "b", "c"], 416 "float": np.array([1.2, 2.3, 3.4], dtype=np.float32), 417 "double": np.array([1.2, 2.3, 3.4], dtype=np.float64), 418 "integer": np.array([3, 4, 5], dtype=np.int32), 419 "long": np.array([3, 4, 5], dtype=np.int64), 420 "binary": [bytes([1, 2, 3]), bytes([4, 5]), bytes([6])], 421 "date_string": ["2018-02-03", "1996-03-02", "2021-03-05"], 422 }, 423 columns=[ 424 "boolean", 425 "string", 426 "float", 427 "double", 428 "integer", 429 "long", 430 "binary", 431 "date_string", 432 ], 433 ) 434 435 jsonable_df = pd.DataFrame(source, copy=True) 436 jsonable_df["binary"] = jsonable_df["binary"].map(base64.b64encode) 437 schema = Schema([ 438 ColSpec("boolean", "boolean"), 439 ColSpec("string", "string"), 440 ColSpec("float", "float"), 441 ColSpec("double", "double"), 442 ColSpec("integer", "integer"), 443 ColSpec("long", "long"), 444 ColSpec("binary", "binary"), 445 ColSpec("string", "date_string"), 446 ]) 447 parsed = dataframe_from_raw_json( 448 jsonable_df.to_json(orient="split"), pandas_orient="split", schema=schema 449 ) 450 pd.testing.assert_frame_equal(parsed, source) 451 parsed = dataframe_from_raw_json( 452 jsonable_df.to_json(orient="records"), pandas_orient="records", schema=schema 453 ) 454 pd.testing.assert_frame_equal(parsed, source) 455 # try parsing with tensor schema 456 tensor_schema = Schema([ 457 TensorSpec(np.dtype("bool"), [-1], "boolean"), 458 TensorSpec(np.dtype("str"), [-1], "string"), 459 TensorSpec(np.dtype("float32"), [-1], "float"), 460 TensorSpec(np.dtype("float64"), [-1], "double"), 461 TensorSpec(np.dtype("int32"), [-1], "integer"), 462 TensorSpec(np.dtype("int64"), [-1], "long"), 463 TensorSpec(np.dtype(bytes), [-1], "binary"), 464 ]) 465 parsed = dataframe_from_raw_json( 466 jsonable_df.to_json(orient="split"), pandas_orient="split", schema=tensor_schema 467 ) 468 469 # NB: tensor schema does not automatically decode base64 encoded bytes. 470 pd.testing.assert_frame_equal(parsed, jsonable_df) 471 parsed = dataframe_from_raw_json( 472 jsonable_df.to_json(orient="records"), pandas_orient="records", schema=tensor_schema 473 ) 474 475 # NB: tensor schema does not automatically decode base64 encoded bytes. 476 pd.testing.assert_frame_equal(parsed, jsonable_df) 477 478 # Test parse with TensorSchema with a single tensor 479 tensor_schema = Schema([TensorSpec(np.dtype("float32"), [-1, 3])]) 480 source = pd.DataFrame( 481 { 482 "a": np.array([1, 2, 3], dtype=np.float32), 483 "b": np.array([4.1, 5.2, 6.3], dtype=np.float32), 484 "c": np.array([7, 8, 9], dtype=np.float32), 485 }, 486 columns=["a", "b", "c"], 487 ) 488 pd.testing.assert_frame_equal( 489 source, 490 dataframe_from_raw_json( 491 source.to_json(orient="split"), pandas_orient="split", schema=tensor_schema 492 ), 493 ) 494 pd.testing.assert_frame_equal( 495 source, 496 dataframe_from_raw_json( 497 source.to_json(orient="records"), pandas_orient="records", schema=tensor_schema 498 ), 499 ) 500 501 schema = Schema([ColSpec("datetime", "datetime")]) 502 parsed = dataframe_from_raw_json( 503 """ 504 [ 505 {"datetime": "2022-01-01T00:00:00"}, 506 {"datetime": "2022-01-02T03:04:05"} 507 ] 508 """, 509 pandas_orient="records", 510 schema=schema, 511 ) 512 expected = pd.DataFrame( 513 { 514 "datetime": pd.to_datetime([ 515 "2022-01-01T00:00:00", 516 "2022-01-02T03:04:05", 517 ]) 518 }, 519 ) 520 pd.testing.assert_frame_equal(parsed, expected) 521 522 523 @pytest.mark.parametrize( 524 ("dt", "expected"), 525 [ 526 (datetime.datetime(2022, 1, 1), '"2022-01-01T00:00:00"'), 527 (datetime.datetime(2022, 1, 2, 3, 4, 5), '"2022-01-02T03:04:05"'), 528 (datetime.date(2022, 1, 1), '"2022-01-01"'), 529 (datetime.time(0, 0, 0), '"00:00:00"'), 530 (pd.Timestamp(2022, 1, 1), '"2022-01-01T00:00:00"'), 531 ], 532 ) 533 def test_datetime_encoder(dt, expected): 534 assert json.dumps(dt, cls=_CustomJsonEncoder) == expected 535 536 537 @pytest.mark.parametrize( 538 ("dataframe", "schema", "expected"), 539 [ 540 ( 541 pd.DataFrame(columns=["foo"], data=[1, 2, 3]), 542 Schema([TensorSpec(np.dtype("float64"), [-1], "foo")]), 543 np.dtype("float64"), 544 ), 545 ( 546 pd.DataFrame(columns=["foo"], data=[[[1, 2, 3]], [[4, 5, 6]]]), 547 Schema([TensorSpec(np.dtype("float64"), [-1, 1], "foo")]), 548 np.dtype("object"), 549 ), 550 ( 551 pd.DataFrame(index=[1, 2, 3], columns=["foo"], data=[1, 2, 3]), 552 Schema([TensorSpec(np.dtype("float64"), [-1], "foo")]), 553 np.dtype("float64"), 554 ), 555 ( 556 pd.DataFrame(columns=["foo"], data=[1, 2, 3]), 557 Schema([ColSpec("double", "foo")]), 558 np.dtype("float64"), 559 ), 560 ], 561 ) 562 def test_cast_df_types_according_to_schema_success(dataframe, schema, expected): 563 casted_pdf = cast_df_types_according_to_schema(dataframe, schema) 564 assert casted_pdf["foo"].dtype == expected 565 566 567 @pytest.mark.parametrize( 568 ("dataframe", "schema", "error_message"), 569 [ 570 ( 571 pd.DataFrame(columns=["foo"], data=[1, 2, 3]), 572 Schema([ColSpec("binary", "foo")]), 573 r"TypeError\('encoding without a string argument'\)", 574 ), 575 ( 576 pd.DataFrame(columns=["foo"], data=["a", "b", "c"]), 577 Schema([ColSpec("double", "foo")]), 578 r'ValueError\("could not convert string to float: \'a\'"\)', 579 ), 580 ], 581 ) 582 def test_cast_df_types_according_to_schema_error_message(dataframe, schema, error_message): 583 with pytest.raises(MlflowFailedTypeConversion, match=error_message): 584 cast_df_types_according_to_schema(dataframe, schema) 585 586 587 @pytest.mark.parametrize( 588 ("data", "schema", "instances_data"), 589 [ 590 ({"query": "sentence"}, Schema([ColSpec(DataType.string, name="query")]), None), 591 ( 592 {"query": ["sentence_1", "sentence_2"]}, 593 Schema([ColSpec(Array(DataType.string), name="query")]), 594 None, 595 ), 596 ( 597 {"query": ["sentence_1", "sentence_2"], "table": "some_table"}, 598 Schema([ 599 ColSpec(Array(DataType.string), name="query"), 600 ColSpec(DataType.string, name="table"), 601 ]), 602 None, 603 ), 604 ( 605 {"query": [{"name": "value", "age": 10}, {"name": "value"}], "table": ["some_table"]}, 606 Schema([ 607 ColSpec( 608 Array( 609 Object([ 610 Property("name", DataType.string), 611 Property("age", DataType.long, required=False), 612 ]) 613 ), 614 name="query", 615 ), 616 ColSpec(Array(DataType.string), name="table"), 617 ]), 618 None, 619 ), 620 ( 621 [{"query": "sentence"}, {"query": "sentence"}], 622 Schema([ColSpec(DataType.string, name="query")]), 623 {"query": ["sentence", "sentence"]}, 624 ), 625 ( 626 [ 627 {"query": ["sentence_1", "sentence_2"], "table": "some_table"}, 628 {"query": ["sentence_1", "sentence_2"]}, 629 ], 630 Schema([ 631 ColSpec(Array(DataType.string), name="query"), 632 ColSpec(DataType.string, name="table", required=False), 633 ]), 634 { 635 "query": [["sentence_1", "sentence_2"], ["sentence_1", "sentence_2"]], 636 "table": ["some_table"], 637 }, 638 ), 639 ( 640 [ 641 {"query": {"a": "sentence_1", "b": "sentence_2"}, "table": "some_table"}, 642 {"query": {"a": "sentence_1"}, "table": "some_table"}, 643 ], 644 Schema([ 645 ColSpec( 646 Object([ 647 Property("a", DataType.string), 648 Property("b", DataType.string, required=False), 649 ]), 650 name="query", 651 ), 652 ColSpec(DataType.string, name="table"), 653 ]), 654 { 655 "query": [{"a": "sentence_1", "b": "sentence_2"}, {"a": "sentence_1"}], 656 "table": ["some_table", "some_table"], 657 }, 658 ), 659 ( 660 { 661 "query": [{"name": "value", "age": "10"}, {"name": "value"}], 662 "table": {"k": "some_table"}, 663 "data": {"k1": ["a", "b"], "k2": ["c"]}, 664 }, 665 Schema([ 666 ColSpec( 667 Array(Map(value_type=DataType.string)), 668 name="query", 669 ), 670 ColSpec(Map(value_type=DataType.string), name="table"), 671 ColSpec(Map(value_type=Array(DataType.string)), name="data"), 672 ]), 673 None, 674 ), 675 ], 676 ) 677 def test_parse_tf_serving_input_for_dictionaries_and_lists_and_maps(data, schema, instances_data): 678 np.testing.assert_equal(parse_tf_serving_input({"inputs": data}, schema), data) 679 if instances_data is None: 680 np.testing.assert_equal(parse_tf_serving_input({"instances": data}, schema), data) 681 else: 682 np.testing.assert_equal(parse_tf_serving_input({"instances": data}, schema), instances_data) 683 df = pd.DataFrame(data) if isinstance(data, list) else pd.DataFrame([data]) 684 df_split = df.to_dict(orient="split") 685 pd.testing.assert_frame_equal(dataframe_from_parsed_json(df_split, "split", schema), df) 686 df_records = df.to_dict(orient="records") 687 pd.testing.assert_frame_equal(dataframe_from_parsed_json(df_records, "records", schema), df)