/ test / resources / tiny_shakespeare_tokenized / dataset_info.json
dataset_info.json
 1  {
 2    "builder_name": "text",
 3    "citation": "",
 4    "config_name": "default",
 5    "dataset_name": "text",
 6    "dataset_size": 1235394,
 7    "description": "",
 8    "download_checksums": {
 9      "/home/jobuser/Liger-Kernel/test/resources/scripts/./../../resources/tiny_shakespeare.txt": {
10        "num_bytes": 1115393,
11        "checksum": null
12      }
13    },
14    "download_size": 1115393,
15    "features": {
16      "input_ids": {
17        "feature": {
18          "dtype": "int32",
19          "_type": "Value"
20        },
21        "_type": "Sequence"
22      },
23      "attention_mask": {
24        "feature": {
25          "dtype": "int8",
26          "_type": "Value"
27        },
28        "_type": "Sequence"
29      }
30    },
31    "homepage": "",
32    "license": "",
33    "size_in_bytes": 2350787,
34    "splits": {
35      "train": {
36        "name": "train",
37        "num_bytes": 1235394,
38        "num_examples": 40000,
39        "dataset_name": "text"
40      }
41    },
42    "version": {
43      "version_str": "0.0.0",
44      "major": 0,
45      "minor": 0,
46      "patch": 0
47    }
48  }