Qwen2_5_VL_7B_Vision_Model.ipynb
1 { 2 "cells": [ 3 { 4 "cell_type": "markdown", 5 "id": "a627af7f", 6 "metadata": { 7 "id": "a627af7f" 8 }, 9 "source": [ 10 "# Qwen2.5-VL (7B) Vision Model" 11 ] 12 }, 13 { 14 "cell_type": "markdown", 15 "id": "6ca9476a", 16 "metadata": { 17 "id": "6ca9476a" 18 }, 19 "source": [ 20 "## Description\n", 21 "This notebook demonstrates how to use the **Qwen2.5-VL (7B) Vision Model** vision-language model for image input and text generation tasks." 22 ] 23 }, 24 { 25 "cell_type": "markdown", 26 "source": [ 27 "[](https://colab.research.google.com/github/DhivyaBharathy-web/PraisonAI/blob/main/examples/cookbooks/Qwen2_5_VL_7B_Vision_Model.ipynb)" 28 ], 29 "metadata": { 30 "id": "0G9UGDLJk8uV" 31 }, 32 "id": "0G9UGDLJk8uV" 33 }, 34 { 35 "cell_type": "markdown", 36 "id": "f467ebfe", 37 "metadata": { 38 "id": "f467ebfe" 39 }, 40 "source": [ 41 "## Dependencies\n", 42 "```python\n", 43 "!pip install transformers accelerate torch torchvision\n", 44 "```" 45 ] 46 }, 47 { 48 "cell_type": "markdown", 49 "id": "ea76265d", 50 "metadata": { 51 "id": "ea76265d" 52 }, 53 "source": [ 54 "## Tools\n", 55 "- 🤗 Transformers\n", 56 "- PyTorch\n", 57 "- Vision Model APIs" 58 ] 59 }, 60 { 61 "cell_type": "markdown", 62 "id": "37c57a37", 63 "metadata": { 64 "id": "37c57a37" 65 }, 66 "source": [ 67 "## YAML Prompt\n", 68 "```yaml\n", 69 "mode: vision\n", 70 "model: Qwen2.5-VL (7B) Vision Model\n", 71 "tasks:\n", 72 " - image captioning\n", 73 " - visual question answering\n", 74 "```" 75 ] 76 }, 77 { 78 "cell_type": "markdown", 79 "id": "7b02bc60", 80 "metadata": { 81 "id": "7b02bc60" 82 }, 83 "source": [ 84 "## Main\n", 85 "Below is the simplified main execution to load the model and run basic inference." 86 ] 87 }, 88 { 89 "cell_type": "code", 90 "execution_count": null, 91 "id": "6c397c49", 92 "metadata": { 93 "id": "6c397c49" 94 }, 95 "outputs": [], 96 "source": [ 97 "%%capture\n", 98 "import os\n", 99 "if \"COLAB_\" not in \"\".join(os.environ.keys()):\n", 100 " !pip install unsloth\n", 101 "else:\n", 102 " # Do this only in Colab notebooks! Otherwise use pip install unsloth\n", 103 " !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo\n", 104 " !pip install sentencepiece protobuf \"datasets>=3.4.1\" huggingface_hub hf_transfer\n", 105 " !pip install --no-deps unsloth" 106 ] 107 }, 108 { 109 "cell_type": "code", 110 "execution_count": null, 111 "id": "18b3d67c", 112 "metadata": { 113 "colab": { 114 "base_uri": "https://localhost:8080/", 115 "height": 513, 116 "referenced_widgets": [ 117 "73fc3ac47de840cda7a07297153404d5", 118 "8c8b54f4d34a4b59a5a2513bbc082297", 119 "7bd2abffca3f4193afffd3f1a7c8ff8a", 120 "9ecdc7e0e9c247c59e9eab93455153b8", 121 "84f4e9f7ff6f433bbf13078e43ebbe2a", 122 "7475f8a404624b1b844d8148fabb493b", 123 "9d4aae9ebe594bd6875a5963eb6d77ed", 124 "62b6d78abada429393c499e0479eb56e", 125 "2aecce21a3a04971b4f5c9fd0b059738", 126 "2af66136423c41c6908c5a61c6fa2e5d", 127 "8bf92078d48448848402ede49b1b5182", 128 "78ae8aea29e04420bcfe616061934d98", 129 "e22b1706b6be49af8e31d2c512ccaaf6", 130 "71ac48ba667b4af291df3a33e7643c6c", 131 "7d16f0e5aaec4429a5b8d3c4878ea74a", 132 "a21de38381c241309bc798be604c3bf5", 133 "f26f1de0f3f34010ac0d37d6f3ed74b6", 134 "0af61827920e415daf64cfcb1685ee63", 135 "77a82cf2255b43e088812087af4d8ead", 136 "fad668efab014d28877087f38f3c034e", 137 "e764888263e54f0ba2f603992d678096", 138 "2dccbbbdfbf645b8b5896953f6e8a59b", 139 "45385885a26e42d0b1f52eda382fd6f4", 140 "ccb21e1b7fc14a6ab10689154ab2b94a", 141 "f5c7ab8d65c4418a943a51a401720f67", 142 "f6aaa74bce474abcbc9daf0515b6ffbd", 143 "76bd63e567e94650a82272ca6fd85011", 144 "d3cac12f194a40a187bbba68da36173c", 145 "30439a7bf22f416cbc01cfe87e433e1f", 146 "3265b3490b11426a8aa44d541981a296", 147 "78a31b91439c4c99a07f8f8d8dfb548f", 148 "d2a3a751ac2f491c9e7cb7a4092ab0b6", 149 "b652bb727c1148c4aac9ed751920e3ae", 150 "14e519d5e0834fd9a70a111140f8429a", 151 "41f3770e6d864c1794a1fa93feaf8605", 152 "6aa1670b808b42db894756b904243947", 153 "5757f1d68b8f4571a05486d846127b14", 154 "14cbf1b61170470bbb9d63e31d0ca310", 155 "3324ce12dc3e45fab9984eb83a92eb39", 156 "0ccabea3b39d4b148d420e26b53d3990", 157 "2d65a3b83b524eeaafde8e406e90198f", 158 "d179ef65b2b842d69548302c2f7a31be", 159 "2362fd437a134c30b147df947ee97128", 160 "7f22e31fc7174ff2bfa8bc2624d33dc7", 161 "643db314d8e94feeb9cd8747909eaabe", 162 "b80bb8f2cc92400aa34934978f9bc865", 163 "03f0c63c572249aabb7fb61272af1f1b", 164 "4113f045f9bc440b8dfaed5dfab14c1b", 165 "86a231a75d4a4586a53e4c7e74c63550", 166 "573653d0298246868e5d93069ca0fef5", 167 "c20a42f12c4545dfafdba22267e44d4b", 168 "4b74060c0bb44a35b847e6752b93e463", 169 "7e211233455342d48bd60f6d499b91f5", 170 "520ca077c6854e6fa1e019c0b209eae7", 171 "8f1fce549e494998b13f523bd793d300", 172 "bbb5e6a0a8b64d10802fbf3e89778306", 173 "0cef8128eeba4638bd268f9a1a364427", 174 "399cc918cec24db7b99af9e5f3c5adb4", 175 "3a611a2e00be4da6833172d928802fb9", 176 "0f2f303ae91349e29132055105fcbdec", 177 "17c08a96e04244a995e658eeb5a69699", 178 "9269dee0b0104b22b4043d3661f79350", 179 "a405cbc6d42d4f8f8be7c05f4e34128c", 180 "e702e948bb34481d995c75be223c4e16", 181 "c9949e2b99d14fa0bc4cfc88f42c2afb", 182 "48bbf9b662af47c5b99ebe24a6b70549", 183 "c1af10a57c0a40cf818d0974facdcc36", 184 "e4c1ff933c6e4ef49afbedffcdd99d5c", 185 "3caf3a6d48da403099bcdee89bda6949", 186 "7e0bbe2417e0420db1502390ba7c6c2d", 187 "5522658c84dc47a491f5deb08353b631", 188 "aecb331a01a44c9e92c74840faaf2616", 189 "caadc7751ff847b4a831dd6b2b53e60b", 190 "f0dd55c8f71a4f47a91a7018ab89d092", 191 "7a2d1ba8441e4a368a505c38d2c811ad", 192 "6657bbba902e43429224ed9c426389fe", 193 "a25ba73fb99448a3b46ac3571e2db0be", 194 "c2f518986b724620bbc67f7b6ede6d96", 195 "ed214eea73f54928af276883ddd53b5e", 196 "b241468a1d684fbb820baff6b9f9e362", 197 "0f7da532813445f1be01977cdec364ab", 198 "de7ac823a59d488995aa37bfbcec2e5b", 199 "fc5321766b8742ca97c4a839ba30d421", 200 "c4256f50e38e481083f3d366bfd91d3f", 201 "f7e4e90fb3fe4bbb85adb039404dd8e2", 202 "e0f8b5f130064bd69f4f3298e9536b5a", 203 "20956dab8f3e4f22830b4d9e56a5232a", 204 "4efe313f923c4ae585fb8a4353cecd97", 205 "3af59a3e9ac2430da201262fc54b5998", 206 "ad746cc40cdb461bba8cf04033d4afc5", 207 "3d09770f22854ca38213d4a64ace6df3", 208 "910df16ec7494eb68fb1f8ce95046414", 209 "080c7eed9cc0475980e85a530c91e616", 210 "e4fe961d149b461fb1d168d226c8aa1d", 211 "2a588c12d3814de495fa865a63629ba8", 212 "607a235508e54fdda0aef75311c7a213", 213 "383e14f14b0d4328a80e6b0283c421fa", 214 "913324d3476a4d04887d200f91afc553", 215 "4c0c6892c4f347deb4a85544664ff1e1", 216 "a863bb17b5984981bf9bd4ff93e438f4", 217 "b28286deba7c4152a8d2c21ea0d19402", 218 "fb9e17975b2349de81115173af6ac17a", 219 "f3b068de4fa6466e88970b73c868de0a", 220 "2127bee2928e43578b8b51e3c1fdfabf", 221 "a97f3d8c7bdf4292ae8ed133f059f635", 222 "67053bf18cd04347a0f2d2ff6134ae88", 223 "80e56300aee14b4386832aa1618c5d43", 224 "d096e33830754ea5b2e8f102548b805a", 225 "a913ea3370404f5f972754715889aced", 226 "35db2f7869f44aeab422409d27d5f7ec" 227 ] 228 }, 229 "id": "18b3d67c", 230 "outputId": "f0d7f72f-8166-439b-96da-ed7e30f8bdb3" 231 }, 232 "outputs": [ 233 { 234 "name": "stdout", 235 "output_type": "stream", 236 "text": [ 237 "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", 238 "🦥 Unsloth Zoo will now patch everything to make training faster!\n", 239 "==((====))== Unsloth 2025.3.19: Fast Qwen2_5_Vl patching. Transformers: 4.50.0.\n", 240 " \\\\ /| Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.\n", 241 "O^O/ \\_/ \\ Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0\n", 242 "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]\n", 243 " \"-____-\" Free license: http://github.com/unslothai/unsloth\n", 244 "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" 245 ] 246 }, 247 { 248 "data": { 249 "application/vnd.jupyter.widget-view+json": { 250 "model_id": "73fc3ac47de840cda7a07297153404d5", 251 "version_major": 2, 252 "version_minor": 0 253 }, 254 "text/plain": [ 255 "model.safetensors: 0%| | 0.00/5.97G [00:00<?, ?B/s]" 256 ] 257 }, 258 "metadata": {}, 259 "output_type": "display_data" 260 }, 261 { 262 "data": { 263 "application/vnd.jupyter.widget-view+json": { 264 "model_id": "78ae8aea29e04420bcfe616061934d98", 265 "version_major": 2, 266 "version_minor": 0 267 }, 268 "text/plain": [ 269 "generation_config.json: 0%| | 0.00/267 [00:00<?, ?B/s]" 270 ] 271 }, 272 "metadata": {}, 273 "output_type": "display_data" 274 }, 275 { 276 "data": { 277 "application/vnd.jupyter.widget-view+json": { 278 "model_id": "45385885a26e42d0b1f52eda382fd6f4", 279 "version_major": 2, 280 "version_minor": 0 281 }, 282 "text/plain": [ 283 "preprocessor_config.json: 0%| | 0.00/575 [00:00<?, ?B/s]" 284 ] 285 }, 286 "metadata": {}, 287 "output_type": "display_data" 288 }, 289 { 290 "name": "stderr", 291 "output_type": "stream", 292 "text": [ 293 "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.\n" 294 ] 295 }, 296 { 297 "data": { 298 "application/vnd.jupyter.widget-view+json": { 299 "model_id": "14e519d5e0834fd9a70a111140f8429a", 300 "version_major": 2, 301 "version_minor": 0 302 }, 303 "text/plain": [ 304 "tokenizer_config.json: 0%| | 0.00/7.33k [00:00<?, ?B/s]" 305 ] 306 }, 307 "metadata": {}, 308 "output_type": "display_data" 309 }, 310 { 311 "data": { 312 "application/vnd.jupyter.widget-view+json": { 313 "model_id": "643db314d8e94feeb9cd8747909eaabe", 314 "version_major": 2, 315 "version_minor": 0 316 }, 317 "text/plain": [ 318 "vocab.json: 0%| | 0.00/2.78M [00:00<?, ?B/s]" 319 ] 320 }, 321 "metadata": {}, 322 "output_type": "display_data" 323 }, 324 { 325 "data": { 326 "application/vnd.jupyter.widget-view+json": { 327 "model_id": "bbb5e6a0a8b64d10802fbf3e89778306", 328 "version_major": 2, 329 "version_minor": 0 330 }, 331 "text/plain": [ 332 "merges.txt: 0%| | 0.00/1.67M [00:00<?, ?B/s]" 333 ] 334 }, 335 "metadata": {}, 336 "output_type": "display_data" 337 }, 338 { 339 "data": { 340 "application/vnd.jupyter.widget-view+json": { 341 "model_id": "c1af10a57c0a40cf818d0974facdcc36", 342 "version_major": 2, 343 "version_minor": 0 344 }, 345 "text/plain": [ 346 "tokenizer.json: 0%| | 0.00/11.4M [00:00<?, ?B/s]" 347 ] 348 }, 349 "metadata": {}, 350 "output_type": "display_data" 351 }, 352 { 353 "data": { 354 "application/vnd.jupyter.widget-view+json": { 355 "model_id": "c2f518986b724620bbc67f7b6ede6d96", 356 "version_major": 2, 357 "version_minor": 0 358 }, 359 "text/plain": [ 360 "added_tokens.json: 0%| | 0.00/605 [00:00<?, ?B/s]" 361 ] 362 }, 363 "metadata": {}, 364 "output_type": "display_data" 365 }, 366 { 367 "data": { 368 "application/vnd.jupyter.widget-view+json": { 369 "model_id": "3af59a3e9ac2430da201262fc54b5998", 370 "version_major": 2, 371 "version_minor": 0 372 }, 373 "text/plain": [ 374 "special_tokens_map.json: 0%| | 0.00/614 [00:00<?, ?B/s]" 375 ] 376 }, 377 "metadata": {}, 378 "output_type": "display_data" 379 }, 380 { 381 "data": { 382 "application/vnd.jupyter.widget-view+json": { 383 "model_id": "a863bb17b5984981bf9bd4ff93e438f4", 384 "version_major": 2, 385 "version_minor": 0 386 }, 387 "text/plain": [ 388 "chat_template.json: 0%| | 0.00/1.05k [00:00<?, ?B/s]" 389 ] 390 }, 391 "metadata": {}, 392 "output_type": "display_data" 393 } 394 ], 395 "source": [ 396 "from unsloth import FastVisionModel # FastLanguageModel for LLMs\n", 397 "import torch\n", 398 "\n", 399 "# 4bit pre quantized models we support for 4x faster downloading + no OOMs.\n", 400 "fourbit_models = [\n", 401 " \"unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit\", # Llama 3.2 vision support\n", 402 " \"unsloth/Llama-3.2-11B-Vision-bnb-4bit\",\n", 403 " \"unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit\", # Can fit in a 80GB card!\n", 404 " \"unsloth/Llama-3.2-90B-Vision-bnb-4bit\",\n", 405 "\n", 406 " \"unsloth/Pixtral-12B-2409-bnb-4bit\", # Pixtral fits in 16GB!\n", 407 " \"unsloth/Pixtral-12B-Base-2409-bnb-4bit\", # Pixtral base model\n", 408 "\n", 409 " \"unsloth/Qwen2-VL-2B-Instruct-bnb-4bit\", # Qwen2 VL support\n", 410 " \"unsloth/Qwen2-VL-7B-Instruct-bnb-4bit\",\n", 411 " \"unsloth/Qwen2-VL-72B-Instruct-bnb-4bit\",\n", 412 "\n", 413 " \"unsloth/llava-v1.6-mistral-7b-hf-bnb-4bit\", # Any Llava variant works!\n", 414 " \"unsloth/llava-1.5-7b-hf-bnb-4bit\",\n", 415 "] # More models at https://huggingface.co/unsloth\n", 416 "\n", 417 "model, tokenizer = FastVisionModel.from_pretrained(\n", 418 " \"unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit\",\n", 419 " load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.\n", 420 " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for long context\n", 421 ")" 422 ] 423 }, 424 { 425 "cell_type": "code", 426 "execution_count": null, 427 "id": "45f1a7d8", 428 "metadata": { 429 "id": "45f1a7d8" 430 }, 431 "outputs": [], 432 "source": [ 433 "model = FastVisionModel.get_peft_model(\n", 434 " model,\n", 435 " finetune_vision_layers = True, # False if not finetuning vision layers\n", 436 " finetune_language_layers = True, # False if not finetuning language layers\n", 437 " finetune_attention_modules = True, # False if not finetuning attention layers\n", 438 " finetune_mlp_modules = True, # False if not finetuning MLP layers\n", 439 "\n", 440 " r = 16, # The larger, the higher the accuracy, but might overfit\n", 441 " lora_alpha = 16, # Recommended alpha == r at least\n", 442 " lora_dropout = 0,\n", 443 " bias = \"none\",\n", 444 " random_state = 3407,\n", 445 " use_rslora = False, # We support rank stabilized LoRA\n", 446 " loftq_config = None, # And LoftQ\n", 447 " # target_modules = \"all-linear\", # Optional now! Can specify a list if needed\n", 448 ")" 449 ] 450 }, 451 { 452 "cell_type": "code", 453 "execution_count": null, 454 "id": "915566b6", 455 "metadata": { 456 "colab": { 457 "base_uri": "https://localhost:8080/", 458 "height": 177, 459 "referenced_widgets": [ 460 "5e4f3f4910d1477b924362c095a0cedd", 461 "b5a01de096fa4e2daf66e235bd5686bb", 462 "e9195f3a3e29475f941c302fe2a88a83", 463 "0d6f6fb66522436786728d8602e2f466", 464 "94b72e08764c486d8eed43a4557f1d98", 465 "8e8281a56a3d4879b6b376077890c920", 466 "630894e18b264f7dae8eddb17304a1cd", 467 "697fc92abb824cf4a9bc78709640ba54", 468 "eb6d552d6c1d4e82a521b29b36ae3db0", 469 "7572f2d06114442ba94fc22761a55346", 470 "c6dcd15e155949cbb9698e43e55b7a90", 471 "7365368b63d9425db5d4e5514e6e0586", 472 "5997909fc34645ce9a03d82ab0932766", 473 "ea4ebb90b708493c85fa81fecf4d60c0", 474 "227724312eb045a58616f63c3a5b7407", 475 "4f58e8a2ddbb4a73a2421ea37c68fea9", 476 "7e89e5546e394162b7b0f793478527ff", 477 "db9ea237050045d8861d76bb3b995753", 478 "3a484309bbe148abb2ef05452850728a", 479 "5e601d00c0094d999a5b37b9555c111c", 480 "e7bd6c674af541d08c0acb5e47579156", 481 "927147d620b0426cb46b49b6d8dc2ffc", 482 "4ebb36e479884847bcef5736e276dfb9", 483 "7b5a059a8ef84817851d109fd7e3a7b5", 484 "0e949ae97d544d65af09cebe63041b8e", 485 "c817a779a9ff4a848391a7c7dff1e4ca", 486 "abae462b98c84006b71d4a62c8a47d5a", 487 "dc1a755aae6c44849d818fd96c046649", 488 "2650d2ce196e404a98cbfac96d0a9bef", 489 "36d579fdc2fb4c59b827f5bf7a92abd4", 490 "fccec1c2a389401eb614f5f1cfb15f0c", 491 "7b4d303966274c71b50312df12e7b87a", 492 "ce4bb2dffcf84ae9bf1f8ac86b2d768e", 493 "639b886f72e24951909e4ffb6cfa247d", 494 "7b13cf1727a04f5ca6de1f7eb536d800", 495 "631ce764f4684225a3c1e4455717a555", 496 "48af93dada914d8b88d5ffffd24ac666", 497 "4bd339c9d3ee43f39678d2c18baeef53", 498 "b1b12b8def994e748b08b27abcef4b87", 499 "bc38022049ce47628d8c215959c38b91", 500 "331d5d2bc67f44f283fc82481cb60411", 501 "0a5ad3bd338b4e6ea94a11a199d2f5ba", 502 "5c41f695158641f485349c20a1405cfd", 503 "89bc17f1911a43cca7196da232500fd6", 504 "f5629e8c82c848218f69810e1b21b0e4", 505 "5244452af0734f3183a7384435f9e1fe", 506 "69f645decfb64baca11cfbdb9773a458", 507 "d1788fabe3e34f6eb60fecab3c7490ba", 508 "8f423066ba8f421495f934a269053cc1", 509 "569d1435402e4533b0427a4b191b6754", 510 "b834a7ab12544f9b8305f5728677e921", 511 "e72ac7c9b7824c81a6e77e6fadad9f9c", 512 "828ad6e5fcfb44b3b4559ec5682d095d", 513 "dac611678f4949ae98051bdc975e083f", 514 "0a21bae13a3c46509db70dbe8e9de5eb" 515 ] 516 }, 517 "id": "915566b6", 518 "outputId": "ece64855-55d4-4011-c5b8-33a9bb622734" 519 }, 520 "outputs": [ 521 { 522 "data": { 523 "application/vnd.jupyter.widget-view+json": { 524 "model_id": "5e4f3f4910d1477b924362c095a0cedd", 525 "version_major": 2, 526 "version_minor": 0 527 }, 528 "text/plain": [ 529 "README.md: 0%| | 0.00/519 [00:00<?, ?B/s]" 530 ] 531 }, 532 "metadata": {}, 533 "output_type": "display_data" 534 }, 535 { 536 "data": { 537 "application/vnd.jupyter.widget-view+json": { 538 "model_id": "7365368b63d9425db5d4e5514e6e0586", 539 "version_major": 2, 540 "version_minor": 0 541 }, 542 "text/plain": [ 543 "train-00000-of-00001.parquet: 0%| | 0.00/344M [00:00<?, ?B/s]" 544 ] 545 }, 546 "metadata": {}, 547 "output_type": "display_data" 548 }, 549 { 550 "data": { 551 "application/vnd.jupyter.widget-view+json": { 552 "model_id": "4ebb36e479884847bcef5736e276dfb9", 553 "version_major": 2, 554 "version_minor": 0 555 }, 556 "text/plain": [ 557 "test-00000-of-00001.parquet: 0%| | 0.00/38.2M [00:00<?, ?B/s]" 558 ] 559 }, 560 "metadata": {}, 561 "output_type": "display_data" 562 }, 563 { 564 "data": { 565 "application/vnd.jupyter.widget-view+json": { 566 "model_id": "639b886f72e24951909e4ffb6cfa247d", 567 "version_major": 2, 568 "version_minor": 0 569 }, 570 "text/plain": [ 571 "Generating train split: 0%| | 0/68686 [00:00<?, ? examples/s]" 572 ] 573 }, 574 "metadata": {}, 575 "output_type": "display_data" 576 }, 577 { 578 "data": { 579 "application/vnd.jupyter.widget-view+json": { 580 "model_id": "f5629e8c82c848218f69810e1b21b0e4", 581 "version_major": 2, 582 "version_minor": 0 583 }, 584 "text/plain": [ 585 "Generating test split: 0%| | 0/7632 [00:00<?, ? examples/s]" 586 ] 587 }, 588 "metadata": {}, 589 "output_type": "display_data" 590 } 591 ], 592 "source": [ 593 "from datasets import load_dataset\n", 594 "dataset = load_dataset(\"unsloth/LaTeX_OCR\", split = \"train\")" 595 ] 596 }, 597 { 598 "cell_type": "markdown", 599 "id": "6193435d", 600 "metadata": { 601 "id": "6193435d" 602 }, 603 "source": [ 604 "## Output\n", 605 "This shows a basic output example of the vision-language model." 606 ] 607 } 608 ], 609 "metadata": { 610 "colab": { 611 "provenance": [] 612 } 613 }, 614 "nbformat": 4, 615 "nbformat_minor": 5 616 }