<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>Cradicle Explorer</title>
    <link href="/css/bootstrap/bootstrap.min.css" rel="stylesheet">
    <style>
      .form-control-dark::placeholder {
          color: #aaa;
          opacity: 1;
      }
    </style>
    <link rel="stylesheet" href="/assets/fontawesome/css/all.min.css">
    <link rel="icon" type="image/png" href="/favicon.png">


                <link href="/css/dashboard.css" rel="stylesheet">
                </head>
                <body>
                <header class="navbar navbar-dark sticky-top bg-dark flex-md-nowrap p-0 shadow">
                  <a class="navbar-brand col-md-3 col-lg-2 me-0 px-3 fs-6" href="/">Cradicle Explorer</a>
                  <button class="navbar-toggler position-absolute d-md-none collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#sidebarMenu" aria-controls="sidebarMenu" aria-expanded="false" aria-label="Toggle navigation">
                    <span class="navbar-toggler-icon"></span>
                  </button>
                  <form method="get" action="/cgi-bin/main" style="width:100%;"><input class="form-control form-control-dark w-100 rounded-0 border-0" type="text" name="q" placeholder="Search repos" aria-label="Search"></form>
                  <div class="navbar-nav flex-row">
                    <div class="nav-item text-nowrap">
                      <a class="nav-link px-3 active" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh">llama.cpp</a>
                    </div>
                  </div>
                </header>
                <div class="container-fluid">
                  <div class="row">
                    <nav id="sidebarMenu" class="col-md-3 col-lg-2 d-md-block bg-dark sidebar collapse">
                      <div class="position-sticky pt-3 sidebar-sticky">
                        <ul class="nav flex-column">
                          <li class="nav-item">
                            <a class="nav-link" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh">
                              <i class="align-text-bottom fa-solid fa-info"></i>
                              Info
                            </a>
                          </li>
                          <li class="nav-item">
                            <a class="nav-link" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&issue=list">
                              <i class="align-text-bottom fa-solid fa-layer-group"></i>
                              Issues
                            </a>
                          </li>
                          <li class="nav-item">
                            <a class="nav-link" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&patch=list">
                              <i class="align-text-bottom fa-solid fa-vest-patches"></i>
                              Patches
                            </a>
                          </li>
                          <li class="nav-item">
                            <a class="nav-link" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&wallet=list">
                              <i class="align-text-bottom fa-solid fa-wallet"></i>
                              Wallets
                            </a>
                          </li>
                          <li class="nav-item">
                            <a class="nav-link active" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=.">
                              <i class="align-text-bottom fa-solid fa-code"></i>
                              Source
                            </a>
                          </li>
                        <h6 class="sidebar-heading d-flex justify-content-between align-items-center px-3 mt-4 mb-1 text-muted text-uppercase">
                          <span></span>
                        </h6>
                        <ul class="nav flex-column mb-2">
                        
    <h6 class="sidebar-heading d-flex justify-content-between align-items-center px-3 mt-1 mb-1 text-muted text-uppercase">
      <span>Source</span>
    </h6>
    <li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=.devops"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> .devops</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=.github"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> .github</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=ci"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> ci</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=cmake"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> cmake</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=common"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> common</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=docs"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> docs</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=examples"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> examples</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=ggml-cuda"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> ggml-cuda</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=ggml-sycl"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> ggml-sycl</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=gguf-py"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> gguf-py</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=grammars"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> grammars</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=kompute-shaders"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> kompute-shaders</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=media"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> media</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=models"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> models</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=pocs"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> pocs</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=prompts"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> prompts</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=requirements"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> requirements</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=scripts"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> scripts</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=spm-headers"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> spm-headers</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=tests"><i class="fa-solid fa-folder-open" style="color:#f0c040;"></i> tests</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2F.gitignore"><i class="fa-solid fa-file" style="color:#888;"></i> .gitignore</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2FCMakeLists.txt"><i class="fa-solid fa-file" style="color:#888;"></i> CMakeLists.txt</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Fget-model.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> get-model.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Fget-model.h"><i class="fa-solid fa-file" style="color:#888;"></i> get-model.h</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Frun-json-schema-to-grammar.mjs"><i class="fa-solid fa-file" style="color:#888;"></i> run-json-schema-to-grammar.mjs</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-autorelease.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-autorelease.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-backend-ops.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-backend-ops.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-c.c"><i class="fa-solid fa-file" style="color:#888;"></i> test-c.c</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-chat-template.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-chat-template.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-double-float.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-double-float.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-grad0.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-grad0.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-grammar-integration.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-grammar-integration.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-grammar-parser.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-grammar-parser.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-json-schema-to-grammar.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-json-schema-to-grammar.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-llama-grammar.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-llama-grammar.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-model-load-cancel.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-model-load-cancel.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-opt.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-opt.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-quantize-fns.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-quantize-fns.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-quantize-perf.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-quantize-perf.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-rope.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-rope.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-sampling.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-sampling.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-tokenizer-0.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-tokenizer-0.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-tokenizer-0.py"><i class="fa-solid fa-file" style="color:#888;"></i> test-tokenizer-0.py</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-tokenizer-0.sh"><i class="fa-solid fa-file" style="color:#888;"></i> test-tokenizer-0.sh</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-tokenizer-1-bpe.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-tokenizer-1-bpe.cpp</a></li><li><a class="nav-link py-0" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-tokenizer-1-spm.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> test-tokenizer-1-spm.cpp</a></li><li><a class="nav-link py-0 active" style="padding-left:32px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=tests%2Ftest-tokenizer-random.py"><i class="fa-solid fa-file" style="color:#888;"></i> test-tokenizer-random.py</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=vulkan-shaders"><i class="fa-solid fa-folder" style="color:#f0c040;"></i> vulkan-shaders</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=.clang-tidy"><i class="fa-solid fa-file" style="color:#888;"></i> .clang-tidy</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=.dockerignore"><i class="fa-solid fa-file" style="color:#888;"></i> .dockerignore</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=.ecrc"><i class="fa-solid fa-file" style="color:#888;"></i> .ecrc</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=.editorconfig"><i class="fa-solid fa-file" style="color:#888;"></i> .editorconfig</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=.flake8"><i class="fa-solid fa-file" style="color:#888;"></i> .flake8</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=.gitignore"><i class="fa-solid fa-file" style="color:#888;"></i> .gitignore</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=.gitmodules"><i class="fa-solid fa-file" style="color:#888;"></i> .gitmodules</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=.pre-commit-config.yaml"><i class="fa-solid fa-file" style="color:#888;"></i> .pre-commit-config.yaml</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=AUTHORS"><i class="fa-solid fa-file" style="color:#888;"></i> AUTHORS</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=CMakeLists.txt"><i class="fa-solid fa-file" style="color:#888;"></i> CMakeLists.txt</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=CMakePresets.json"><i class="fa-solid fa-file" style="color:#888;"></i> CMakePresets.json</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=CONTRIBUTING.md"><i class="fa-solid fa-file" style="color:#888;"></i> CONTRIBUTING.md</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=LICENSE"><i class="fa-solid fa-file" style="color:#888;"></i> LICENSE</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=Makefile"><i class="fa-solid fa-file" style="color:#888;"></i> Makefile</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=Package.swift"><i class="fa-solid fa-file" style="color:#888;"></i> Package.swift</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=README-sycl.md"><i class="fa-solid fa-file" style="color:#888;"></i> README-sycl.md</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=README.md"><i class="fa-solid fa-file" style="color:#888;"></i> README.md</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=SECURITY.md"><i class="fa-solid fa-file" style="color:#888;"></i> SECURITY.md</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=codecov.yml"><i class="fa-solid fa-file" style="color:#888;"></i> codecov.yml</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=convert-hf-to-gguf-update.py"><i class="fa-solid fa-file" style="color:#888;"></i> convert-hf-to-gguf-update.py</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=convert-hf-to-gguf.py"><i class="fa-solid fa-file" style="color:#888;"></i> convert-hf-to-gguf.py</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=convert-llama-ggml-to-gguf.py"><i class="fa-solid fa-file" style="color:#888;"></i> convert-llama-ggml-to-gguf.py</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=flake.lock"><i class="fa-solid fa-file" style="color:#888;"></i> flake.lock</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=flake.nix"><i class="fa-solid fa-file" style="color:#888;"></i> flake.nix</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-alloc.c"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-alloc.c</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-alloc.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-alloc.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-backend-impl.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-backend-impl.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-backend.c"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-backend.c</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-backend.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-backend.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-blas.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-blas.cpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-blas.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-blas.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-common.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-common.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-cuda.cu"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-cuda.cu</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-cuda.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-cuda.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-impl.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-impl.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-kompute.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-kompute.cpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-kompute.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-kompute.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-metal.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-metal.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-metal.m"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-metal.m</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-metal.metal"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-metal.metal</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-quants.c"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-quants.c</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-quants.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-quants.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-rpc.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-rpc.cpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-rpc.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-rpc.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-sycl.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-sycl.cpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-sycl.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-sycl.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-vulkan-shaders.hpp"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-vulkan-shaders.hpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-vulkan.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-vulkan.cpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml-vulkan.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml-vulkan.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml.c"><i class="fa-solid fa-file" style="color:#888;"></i> ggml.c</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml.h"><i class="fa-solid fa-file" style="color:#888;"></i> ggml.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=ggml_vk_generate_shaders.py"><i class="fa-solid fa-file" style="color:#888;"></i> ggml_vk_generate_shaders.py</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=kompute"><i class="fa-solid fa-file" style="color:#888;"></i> kompute</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=llama.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> llama.cpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=llama.h"><i class="fa-solid fa-file" style="color:#888;"></i> llama.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=mypy.ini"><i class="fa-solid fa-file" style="color:#888;"></i> mypy.ini</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=pyrightconfig.json"><i class="fa-solid fa-file" style="color:#888;"></i> pyrightconfig.json</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=requirements.txt"><i class="fa-solid fa-file" style="color:#888;"></i> requirements.txt</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=sgemm.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> sgemm.cpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=sgemm.h"><i class="fa-solid fa-file" style="color:#888;"></i> sgemm.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=unicode-data.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> unicode-data.cpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=unicode-data.h"><i class="fa-solid fa-file" style="color:#888;"></i> unicode-data.h</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=unicode.cpp"><i class="fa-solid fa-file" style="color:#888;"></i> unicode.cpp</a></li><li><a class="nav-link py-0" style="padding-left:16px;" href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&file=unicode.h"><i class="fa-solid fa-file" style="color:#888;"></i> unicode.h</a></li>
    
                        </ul>
                      </div>
                    </nav>
                <main class="col-md-9 ms-sm-auto col-lg-10">
                  <div class="container px-1 py-3">
        
<div class="mb-2" style="font-size:1.1rem;"><a href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=.">/</a> <a href="/cgi-bin/repo?id=z6ysXz6ubEakbjB6aJfE7AXLxGqh&source=tests">tests</a> / test-tokenizer-random.py</div>
        <div class="list-group">
        <div class="list-group-item">
        <div class="mb-2" style="font-weight:bold;"><i class="fa-solid fa-file"></i> test-tokenizer-random.py</div>
        <pre style="margin:0; font-size:0.85rem; overflow-x:auto; color:#fafafa;"><span style="color:#666; user-select:none;">  1</span>  # Test libllama tokenizer == AutoTokenizer.
<span style="color:#666; user-select:none;">  2</span>  # Brute force random words/text generation.
<span style="color:#666; user-select:none;">  3</span>  #
<span style="color:#666; user-select:none;">  4</span>  # Sample usage:
<span style="color:#666; user-select:none;">  5</span>  #
<span style="color:#666; user-select:none;">  6</span>  #   python3 tests/test-tokenizer-random.py ./models/ggml-vocab-llama-bpe.gguf ./models/tokenizers/llama-bpe
<span style="color:#666; user-select:none;">  7</span>  #
<span style="color:#666; user-select:none;">  8</span>  
<span style="color:#666; user-select:none;">  9</span>  import time
<span style="color:#666; user-select:none;"> 10</span>  import logging
<span style="color:#666; user-select:none;"> 11</span>  import argparse
<span style="color:#666; user-select:none;"> 12</span>  import subprocess
<span style="color:#666; user-select:none;"> 13</span>  import random
<span style="color:#666; user-select:none;"> 14</span>  
<span style="color:#666; user-select:none;"> 15</span>  from typing import Callable, Iterator
<span style="color:#666; user-select:none;"> 16</span>  
<span style="color:#666; user-select:none;"> 17</span>  import cffi
<span style="color:#666; user-select:none;"> 18</span>  from transformers import AutoTokenizer
<span style="color:#666; user-select:none;"> 19</span>  
<span style="color:#666; user-select:none;"> 20</span>  logger = logging.getLogger(&quot;test-tokenizer-random-bpe&quot;)
<span style="color:#666; user-select:none;"> 21</span>  
<span style="color:#666; user-select:none;"> 22</span>  
<span style="color:#666; user-select:none;"> 23</span>  class LibLlama:
<span style="color:#666; user-select:none;"> 24</span>  
<span style="color:#666; user-select:none;"> 25</span>      DEFAULT_PATH_LLAMA_H = &quot;./llama.h&quot;
<span style="color:#666; user-select:none;"> 26</span>      DEFAULT_PATH_LIBLLAMA = &quot;./build/libllama.so&quot;  # CMakeLists.txt: BUILD_SHARED_LIBS ON
<span style="color:#666; user-select:none;"> 27</span>  
<span style="color:#666; user-select:none;"> 28</span>      def __init__(self, path_llama_h: str = None, path_libllama: str = None):
<span style="color:#666; user-select:none;"> 29</span>          path_llama_h = path_llama_h or self.DEFAULT_PATH_LLAMA_H
<span style="color:#666; user-select:none;"> 30</span>          path_libllama = path_libllama or self.DEFAULT_PATH_LIBLLAMA
<span style="color:#666; user-select:none;"> 31</span>          (self.ffi, self.lib) = self._load_libllama_cffi(path_llama_h, path_libllama)
<span style="color:#666; user-select:none;"> 32</span>          self.lib.llama_backend_init()
<span style="color:#666; user-select:none;"> 33</span>  
<span style="color:#666; user-select:none;"> 34</span>      def _load_libllama_cffi(self, path_llama_h: str, path_libllama: str):
<span style="color:#666; user-select:none;"> 35</span>          cmd = [&quot;gcc&quot;, &quot;-E&quot;, &quot;-P&quot;, &quot;-D__restrict=&quot;, &quot;-D__attribute__(x)=&quot;, &quot;-D__asm__(x)=&quot;, path_llama_h]
<span style="color:#666; user-select:none;"> 36</span>          res = subprocess.run(cmd, stdout=subprocess.PIPE)
<span style="color:#666; user-select:none;"> 37</span>          assert (res.returncode == 0)
<span style="color:#666; user-select:none;"> 38</span>          source = res.stdout.decode()
<span style="color:#666; user-select:none;"> 39</span>          ffi = cffi.FFI()
<span style="color:#666; user-select:none;"> 40</span>          if True:  # workarounds for pycparser
<span style="color:#666; user-select:none;"> 41</span>              source = &quot;typedef struct { } __builtin_va_list;&quot; + &quot;\n&quot; + source
<span style="color:#666; user-select:none;"> 42</span>              source = source.replace(&quot;sizeof (int)&quot;,    str(ffi.sizeof(&quot;int&quot;)))
<span style="color:#666; user-select:none;"> 43</span>              source = source.replace(&quot;sizeof (void *)&quot;, str(ffi.sizeof(&quot;void*&quot;)))
<span style="color:#666; user-select:none;"> 44</span>              source = source.replace(&quot;sizeof (size_t)&quot;, str(ffi.sizeof(&quot;size_t&quot;)))
<span style="color:#666; user-select:none;"> 45</span>              source = source.replace(&quot;sizeof(int32_t)&quot;, str(ffi.sizeof(&quot;int32_t&quot;)))
<span style="color:#666; user-select:none;"> 46</span>          ffi.cdef(source, override=True)
<span style="color:#666; user-select:none;"> 47</span>          lib = ffi.dlopen(path_libllama)
<span style="color:#666; user-select:none;"> 48</span>          return (ffi, lib)
<span style="color:#666; user-select:none;"> 49</span>  
<span style="color:#666; user-select:none;"> 50</span>      def model_default_params(self, **kwargs):
<span style="color:#666; user-select:none;"> 51</span>          mparams = self.lib.llama_model_default_params()
<span style="color:#666; user-select:none;"> 52</span>          for k, v in kwargs.items():
<span style="color:#666; user-select:none;"> 53</span>              setattr(mparams, k, v)
<span style="color:#666; user-select:none;"> 54</span>          return mparams
<span style="color:#666; user-select:none;"> 55</span>  
<span style="color:#666; user-select:none;"> 56</span>      def context_default_params(self, **kwargs):
<span style="color:#666; user-select:none;"> 57</span>          cparams = self.lib.llama_context_default_params()
<span style="color:#666; user-select:none;"> 58</span>          for k, v in kwargs.items():
<span style="color:#666; user-select:none;"> 59</span>              setattr(cparams, k, v)
<span style="color:#666; user-select:none;"> 60</span>          return cparams
<span style="color:#666; user-select:none;"> 61</span>  
<span style="color:#666; user-select:none;"> 62</span>  
<span style="color:#666; user-select:none;"> 63</span>  class LibLlamaModel:
<span style="color:#666; user-select:none;"> 64</span>  
<span style="color:#666; user-select:none;"> 65</span>      def __init__(self, libllama: LibLlama, path_model: str, mparams={}, cparams={}):
<span style="color:#666; user-select:none;"> 66</span>          self.lib = libllama.lib
<span style="color:#666; user-select:none;"> 67</span>          self.ffi = libllama.ffi
<span style="color:#666; user-select:none;"> 68</span>          if isinstance(mparams, dict):
<span style="color:#666; user-select:none;"> 69</span>              mparams = libllama.model_default_params(**mparams)
<span style="color:#666; user-select:none;"> 70</span>          self.model = self.lib.llama_load_model_from_file(path_model.encode(), mparams)
<span style="color:#666; user-select:none;"> 71</span>          if not self.model:
<span style="color:#666; user-select:none;"> 72</span>              raise RuntimeError(&quot;error: failed to load model &#x27;%s&#x27;&quot; % path_model)
<span style="color:#666; user-select:none;"> 73</span>          if isinstance(cparams, dict):
<span style="color:#666; user-select:none;"> 74</span>              cparams = libllama.context_default_params(**cparams)
<span style="color:#666; user-select:none;"> 75</span>          self.ctx = self.lib.llama_new_context_with_model(self.model, cparams)
<span style="color:#666; user-select:none;"> 76</span>          if not self.ctx:
<span style="color:#666; user-select:none;"> 77</span>              raise RuntimeError(&quot;error: failed to create context for model &#x27;%s&#x27;&quot; % path_model)
<span style="color:#666; user-select:none;"> 78</span>          n_tokens_max = self.lib.llama_n_ctx(self.ctx)
<span style="color:#666; user-select:none;"> 79</span>          self.token_ids = self.ffi.new(&quot;llama_token[]&quot;, n_tokens_max)
<span style="color:#666; user-select:none;"> 80</span>  
<span style="color:#666; user-select:none;"> 81</span>      def free(self):
<span style="color:#666; user-select:none;"> 82</span>          if self.ctx:
<span style="color:#666; user-select:none;"> 83</span>              self.lib.llama_free(self.ctx)
<span style="color:#666; user-select:none;"> 84</span>          if self.model:
<span style="color:#666; user-select:none;"> 85</span>              self.lib.llama_free_model(self.model)
<span style="color:#666; user-select:none;"> 86</span>          self.ctx = None
<span style="color:#666; user-select:none;"> 87</span>          self.model = None
<span style="color:#666; user-select:none;"> 88</span>          self.lib = None
<span style="color:#666; user-select:none;"> 89</span>  
<span style="color:#666; user-select:none;"> 90</span>      def tokenize(self, text: str, n_tokens_max: int = 0, add_special: bool = False, parse_special: bool = False) -&gt; list[int]:
<span style="color:#666; user-select:none;"> 91</span>          n_tokens_max = n_tokens_max if n_tokens_max &gt; 0 else len(self.token_ids)
<span style="color:#666; user-select:none;"> 92</span>          text = text.encode(&quot;utf-8&quot;)
<span style="color:#666; user-select:none;"> 93</span>          num = self.lib.llama_tokenize(self.model, text, len(text), self.token_ids, n_tokens_max, add_special, parse_special)
<span style="color:#666; user-select:none;"> 94</span>          if num &lt; 0:
<span style="color:#666; user-select:none;"> 95</span>              return []
<span style="color:#666; user-select:none;"> 96</span>          return list(self.token_ids[0:num])
<span style="color:#666; user-select:none;"> 97</span>  
<span style="color:#666; user-select:none;"> 98</span>  
<span style="color:#666; user-select:none;"> 99</span>  def generator_custom_text() -&gt; Iterator[str]:
<span style="color:#666; user-select:none;">100</span>      &quot;&quot;&quot;General tests&quot;&quot;&quot;
<span style="color:#666; user-select:none;">101</span>      yield from [
<span style="color:#666; user-select:none;">102</span>          &quot;&quot;,
<span style="color:#666; user-select:none;">103</span>          &quot; &quot;,
<span style="color:#666; user-select:none;">104</span>          &quot;  &quot;,
<span style="color:#666; user-select:none;">105</span>          &quot;   &quot;,
<span style="color:#666; user-select:none;">106</span>          &quot;\t&quot;,
<span style="color:#666; user-select:none;">107</span>          &quot;\n&quot;,
<span style="color:#666; user-select:none;">108</span>          &quot;\n\n&quot;,
<span style="color:#666; user-select:none;">109</span>          &quot;\n\n\n&quot;,
<span style="color:#666; user-select:none;">110</span>          &quot;\t\n&quot;,
<span style="color:#666; user-select:none;">111</span>          &quot;Hello world&quot;,
<span style="color:#666; user-select:none;">112</span>          &quot; Hello world&quot;,
<span style="color:#666; user-select:none;">113</span>          &quot;Hello World&quot;,
<span style="color:#666; user-select:none;">114</span>          &quot; Hello World&quot;,
<span style="color:#666; user-select:none;">115</span>          &quot; Hello World!&quot;,
<span style="color:#666; user-select:none;">116</span>          &quot;Hello, world!&quot;,
<span style="color:#666; user-select:none;">117</span>          &quot; Hello, world!&quot;,
<span style="color:#666; user-select:none;">118</span>          &quot; this is 🦙.cpp&quot;,
<span style="color:#666; user-select:none;">119</span>          &quot;w048 7tuijk dsdfhu&quot;,
<span style="color:#666; user-select:none;">120</span>          &quot;нещо на Български&quot;,
<span style="color:#666; user-select:none;">121</span>          &quot;កាន់តែពិសេសអាចខលចេញ&quot;,
<span style="color:#666; user-select:none;">122</span>          &quot;🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)&quot;,
<span style="color:#666; user-select:none;">123</span>          &quot;Hello&quot;,
<span style="color:#666; user-select:none;">124</span>          &quot; Hello&quot;,
<span style="color:#666; user-select:none;">125</span>          &quot;  Hello&quot;,
<span style="color:#666; user-select:none;">126</span>          &quot;   Hello&quot;,
<span style="color:#666; user-select:none;">127</span>          &quot;    Hello&quot;,
<span style="color:#666; user-select:none;">128</span>          &quot;    Hello\n    Hello&quot;,
<span style="color:#666; user-select:none;">129</span>          &quot; (&quot;,
<span style="color:#666; user-select:none;">130</span>          &quot;\n =&quot;,
<span style="color:#666; user-select:none;">131</span>          &quot;&#x27; era&quot;,
<span style="color:#666; user-select:none;">132</span>          &quot;Hello, y&#x27;all! How are you 😁 ?我想在apple工作1314151天～&quot;,
<span style="color:#666; user-select:none;">133</span>          &quot;3&quot;,
<span style="color:#666; user-select:none;">134</span>          &quot;33&quot;,
<span style="color:#666; user-select:none;">135</span>          &quot;333&quot;,
<span style="color:#666; user-select:none;">136</span>          &quot;3333&quot;,
<span style="color:#666; user-select:none;">137</span>          &quot;33333&quot;,
<span style="color:#666; user-select:none;">138</span>          &quot;333333&quot;,
<span style="color:#666; user-select:none;">139</span>          &quot;3333333&quot;,
<span style="color:#666; user-select:none;">140</span>          &quot;33333333&quot;,
<span style="color:#666; user-select:none;">141</span>          &quot;333333333&quot;,
<span style="color:#666; user-select:none;">142</span>      ]
<span style="color:#666; user-select:none;">143</span>  
<span style="color:#666; user-select:none;">144</span>  
<span style="color:#666; user-select:none;">145</span>  def generator_custom_text_edge_cases() -&gt; Iterator[str]:
<span style="color:#666; user-select:none;">146</span>      &quot;&quot;&quot;Edge cases found while debugging&quot;&quot;&quot;
<span style="color:#666; user-select:none;">147</span>      yield from [
<span style="color:#666; user-select:none;">148</span>          &#x27;\x1f-a&#x27;,     # unicode_ranges_control, {0x00001C, 0x00001F}
<span style="color:#666; user-select:none;">149</span>          &#x27;¼-a&#x27;,        # unicode_ranges_digit, 0x00BC
<span style="color:#666; user-select:none;">150</span>          &#x27;½-a&#x27;,        # unicode_ranges_digit, 0x00BD
<span style="color:#666; user-select:none;">151</span>          &#x27;¾-a&#x27;,        # unicode_ranges_digit, 0x00BE
<span style="color:#666; user-select:none;">152</span>          &#x27;a 〇b&#x27;,      # unicode_ranges_digit, 0x3007
<span style="color:#666; user-select:none;">153</span>          &#x27;Ⅵ-a&#x27;,       # unicode_ranges_digit, {0x00002150, 0x0000218F} // Number Forms
<span style="color:#666; user-select:none;">154</span>          &#x27;\uFEFF//&#x27;,   # unicode_ranges_control, 0xFEFF (BOM)
<span style="color:#666; user-select:none;">155</span>          &#x27;Cửa Việt&#x27;,   # llama-3, ignore_merges = true
<span style="color:#666; user-select:none;">156</span>          &#x27;&lt;s&gt;a&#x27;,       # Phi-3 fail
<span style="color:#666; user-select:none;">157</span>          &#x27;&lt;unk&gt;&lt;|endoftext|&gt;&lt;s&gt;&#x27;,  # Phi-3 fail
<span style="color:#666; user-select:none;">158</span>          &#x27;a\na&#x27;,       # TODO: Bert fail
<span style="color:#666; user-select:none;">159</span>          &#x27;a &lt;/s&gt; b&#x27;,   # rstrip phi-3
<span style="color:#666; user-select:none;">160</span>          &#x27;a &lt;mask&gt; b&#x27;, # lstrip jina-v2
<span style="color:#666; user-select:none;">161</span>      ]
<span style="color:#666; user-select:none;">162</span>  
<span style="color:#666; user-select:none;">163</span>  
<span style="color:#666; user-select:none;">164</span>  def generator_vocab_words(vocab: list[str]) -&gt; Iterator[str]:
<span style="color:#666; user-select:none;">165</span>      &quot;&quot;&quot;Brute force check all vocab words&quot;&quot;&quot;
<span style="color:#666; user-select:none;">166</span>      yield from vocab
<span style="color:#666; user-select:none;">167</span>  
<span style="color:#666; user-select:none;">168</span>  
<span style="color:#666; user-select:none;">169</span>  def generator_added_lr_strip(tokenizer) -&gt; Iterator[str]:
<span style="color:#666; user-select:none;">170</span>      WHITESPACES = [&quot;&quot;, &quot; &quot;, &quot;  &quot;, &quot;    &quot;]
<span style="color:#666; user-select:none;">171</span>      special_tokens = list(tokenizer.all_special_tokens)
<span style="color:#666; user-select:none;">172</span>      added_tokens   = list(tokenizer.added_tokens_encoder)
<span style="color:#666; user-select:none;">173</span>      all_tokens     = list(sorted(set(special_tokens + added_tokens)))
<span style="color:#666; user-select:none;">174</span>      for token in all_tokens:
<span style="color:#666; user-select:none;">175</span>          for lstrip in WHITESPACES:
<span style="color:#666; user-select:none;">176</span>              for rstrip in WHITESPACES:
<span style="color:#666; user-select:none;">177</span>                  yield lstrip + token + rstrip
<span style="color:#666; user-select:none;">178</span>                  yield &quot;a&quot; + lstrip + token + rstrip
<span style="color:#666; user-select:none;">179</span>                  yield lstrip + token + rstrip + &quot;z&quot;
<span style="color:#666; user-select:none;">180</span>                  yield &quot;a&quot; + lstrip + token + rstrip + &quot;z&quot;
<span style="color:#666; user-select:none;">181</span>  
<span style="color:#666; user-select:none;">182</span>  
<span style="color:#666; user-select:none;">183</span>  def generator_random_added_tokens(tokenizer, iterations=100) -&gt; Iterator[str]:
<span style="color:#666; user-select:none;">184</span>      special_tokens = list(tokenizer.all_special_tokens)
<span style="color:#666; user-select:none;">185</span>      added_tokens   = list(tokenizer.added_tokens_encoder)
<span style="color:#666; user-select:none;">186</span>      separations    = [&quot; &quot;, &quot;\n&quot;, &quot;\t&quot;, &quot;-&quot;, &quot;!&quot;, &quot;one&quot;, &quot;1&quot;, &quot;&lt;s&gt;&quot;, &quot;&lt;/s&gt;&quot;]
<span style="color:#666; user-select:none;">187</span>      all_tokens     = list(sorted(set(special_tokens + added_tokens + separations)))
<span style="color:#666; user-select:none;">188</span>      rand = random.Random()
<span style="color:#666; user-select:none;">189</span>      for m in range(iterations):
<span style="color:#666; user-select:none;">190</span>          rand.seed(m)
<span style="color:#666; user-select:none;">191</span>          words = rand.choices(all_tokens, k=500)
<span style="color:#666; user-select:none;">192</span>          if words[0] == tokenizer.bos_token:  # skip spam warning of double BOS
<span style="color:#666; user-select:none;">193</span>              while len(words) &gt; 1 and words[1] == tokenizer.bos_token:  # leave one starting BOS
<span style="color:#666; user-select:none;">194</span>                  words.pop(0)
<span style="color:#666; user-select:none;">195</span>              if tokenizer.add_bos_token:  # drop all starting BOS
<span style="color:#666; user-select:none;">196</span>                  words.pop(0)
<span style="color:#666; user-select:none;">197</span>          yield &quot;&quot;.join(words)
<span style="color:#666; user-select:none;">198</span>  
<span style="color:#666; user-select:none;">199</span>  
<span style="color:#666; user-select:none;">200</span>  def generator_random_chars(iterations=100) -&gt; Iterator[str]:
<span style="color:#666; user-select:none;">201</span>      &quot;&quot;&quot;Brute force random text with simple characters&quot;&quot;&quot;
<span style="color:#666; user-select:none;">202</span>  
<span style="color:#666; user-select:none;">203</span>      WHITESPACES = list(&quot; &quot; * 20 + &quot;\n&quot; * 5 + &quot;\r\n&quot; * 5 + &quot;\t&quot; * 5)
<span style="color:#666; user-select:none;">204</span>      CHARS = list(sorted(set(&quot;&quot;&quot;
<span style="color:#666; user-select:none;">205</span>          ABCDEFGHIJKLMNOPQRSTUVWXYZ
<span style="color:#666; user-select:none;">206</span>          abcdefghijklmnopqrstuvwxyz
<span style="color:#666; user-select:none;">207</span>          ÁÉÍÓÚÀÈÌÒÙÂÊÎÔÛÄËÏÖÜ
<span style="color:#666; user-select:none;">208</span>          áéíóúàèìòùâêîôûäëïöü
<span style="color:#666; user-select:none;">209</span>          .-,*/-+ª!&quot;·$%&amp;/()=?¿[]{}&lt;&gt;\\|@#~½¬~;:_
<span style="color:#666; user-select:none;">210</span>      &quot;&quot;&quot;)))
<span style="color:#666; user-select:none;">211</span>  
<span style="color:#666; user-select:none;">212</span>      rand = random.Random()
<span style="color:#666; user-select:none;">213</span>      for m in range(iterations):
<span style="color:#666; user-select:none;">214</span>          rand.seed(m)
<span style="color:#666; user-select:none;">215</span>          text = []
<span style="color:#666; user-select:none;">216</span>          num_words = rand.randint(300, 400)
<span style="color:#666; user-select:none;">217</span>          for i in range(num_words):
<span style="color:#666; user-select:none;">218</span>              k = rand.randint(1, 7)
<span style="color:#666; user-select:none;">219</span>              word = rand.choices(CHARS, k=k)
<span style="color:#666; user-select:none;">220</span>              space = rand.choice(WHITESPACES)
<span style="color:#666; user-select:none;">221</span>              text.append(&quot;&quot;.join(word) + space)
<span style="color:#666; user-select:none;">222</span>          yield &quot;&quot;.join(text)
<span style="color:#666; user-select:none;">223</span>  
<span style="color:#666; user-select:none;">224</span>  
<span style="color:#666; user-select:none;">225</span>  def generator_random_vocab_chars(vocab: list[str], iterations=100) -&gt; Iterator[str]:
<span style="color:#666; user-select:none;">226</span>      &quot;&quot;&quot;Brute force random text with vocab characters&quot;&quot;&quot;
<span style="color:#666; user-select:none;">227</span>  
<span style="color:#666; user-select:none;">228</span>      vocab_chars = set()
<span style="color:#666; user-select:none;">229</span>      for word in vocab:
<span style="color:#666; user-select:none;">230</span>          vocab_chars.update(word)
<span style="color:#666; user-select:none;">231</span>      vocab_chars = list(sorted(vocab_chars))
<span style="color:#666; user-select:none;">232</span>  
<span style="color:#666; user-select:none;">233</span>      rand = random.Random()
<span style="color:#666; user-select:none;">234</span>      for m in range(iterations):
<span style="color:#666; user-select:none;">235</span>          rand.seed(m)
<span style="color:#666; user-select:none;">236</span>          text = rand.choices(vocab_chars, k=1024)
<span style="color:#666; user-select:none;">237</span>          yield &quot;&quot;.join(text)
<span style="color:#666; user-select:none;">238</span>  
<span style="color:#666; user-select:none;">239</span>  
<span style="color:#666; user-select:none;">240</span>  def generator_random_vocab_words(vocab: list[str], iterations=100) -&gt; Iterator[str]:
<span style="color:#666; user-select:none;">241</span>      &quot;&quot;&quot;Brute force random text from vocab words&quot;&quot;&quot;
<span style="color:#666; user-select:none;">242</span>  
<span style="color:#666; user-select:none;">243</span>      vocab = [w.strip() for w in vocab]
<span style="color:#666; user-select:none;">244</span>      yield from vocab
<span style="color:#666; user-select:none;">245</span>  
<span style="color:#666; user-select:none;">246</span>      rand = random.Random()
<span style="color:#666; user-select:none;">247</span>      for m in range(iterations):
<span style="color:#666; user-select:none;">248</span>          rand.seed(m)
<span style="color:#666; user-select:none;">249</span>          text = []
<span style="color:#666; user-select:none;">250</span>          num_words = rand.randint(300, 400)
<span style="color:#666; user-select:none;">251</span>          for i in range(num_words):
<span style="color:#666; user-select:none;">252</span>              k = rand.randint(1, 3)
<span style="color:#666; user-select:none;">253</span>              words = rand.choices(vocab, k=k)
<span style="color:#666; user-select:none;">254</span>              sep = rand.choice(&quot;     \n\r\t&quot;)
<span style="color:#666; user-select:none;">255</span>              text.append(&quot;&quot;.join(words) + sep)
<span style="color:#666; user-select:none;">256</span>          yield &quot;&quot;.join(text)
<span style="color:#666; user-select:none;">257</span>  
<span style="color:#666; user-select:none;">258</span>  
<span style="color:#666; user-select:none;">259</span>  def generator_random_bytes(iterations=100) -&gt; Iterator[str]:
<span style="color:#666; user-select:none;">260</span>      &quot;&quot;&quot;Brute force random bytes&quot;&quot;&quot;
<span style="color:#666; user-select:none;">261</span>  
<span style="color:#666; user-select:none;">262</span>      WHITESPACES = list(&quot; &quot; * 20 + &quot;\n&quot; * 5 + &quot;\r\n&quot; * 5 + &quot;\t&quot; * 5)
<span style="color:#666; user-select:none;">263</span>  
<span style="color:#666; user-select:none;">264</span>      rand = random.Random()
<span style="color:#666; user-select:none;">265</span>      for m in range(iterations):
<span style="color:#666; user-select:none;">266</span>          rand.seed(m)
<span style="color:#666; user-select:none;">267</span>          text = []
<span style="color:#666; user-select:none;">268</span>          num_words = rand.randint(300, 400)
<span style="color:#666; user-select:none;">269</span>          for i in range(num_words):
<span style="color:#666; user-select:none;">270</span>              k = rand.randint(1, 8)
<span style="color:#666; user-select:none;">271</span>              word = [chr(r) for r in rand.randbytes(k) if r]
<span style="color:#666; user-select:none;">272</span>              word.append(rand.choice(WHITESPACES))
<span style="color:#666; user-select:none;">273</span>              text.append(&quot;&quot;.join(word))
<span style="color:#666; user-select:none;">274</span>          yield &quot;&quot;.join(text)
<span style="color:#666; user-select:none;">275</span>  
<span style="color:#666; user-select:none;">276</span>  
<span style="color:#666; user-select:none;">277</span>  def test_compare_tokenizer(func_tokenize1: Callable, func_tokenize2: Callable, generator: Iterator[str]):
<span style="color:#666; user-select:none;">278</span>  
<span style="color:#666; user-select:none;">279</span>      def find_first_mismatch(ids1: list[int], ids2: list[int]):
<span style="color:#666; user-select:none;">280</span>          for i, (a, b) in enumerate(zip(ids1, ids2)):
<span style="color:#666; user-select:none;">281</span>              if a != b:
<span style="color:#666; user-select:none;">282</span>                  return i
<span style="color:#666; user-select:none;">283</span>          if len(ids1) == len(ids2):
<span style="color:#666; user-select:none;">284</span>              return -1
<span style="color:#666; user-select:none;">285</span>          return min(len(ids1), len(ids2))
<span style="color:#666; user-select:none;">286</span>  
<span style="color:#666; user-select:none;">287</span>      t0 = time.perf_counter()
<span style="color:#666; user-select:none;">288</span>      logger.info(&quot;%s: %s&quot; % (generator.__name__, &quot;ini&quot;))
<span style="color:#666; user-select:none;">289</span>      for text in generator:
<span style="color:#666; user-select:none;">290</span>          ids1 = func_tokenize1(text)
<span style="color:#666; user-select:none;">291</span>          ids2 = func_tokenize2(text)
<span style="color:#666; user-select:none;">292</span>          if ids1 != ids2:
<span style="color:#666; user-select:none;">293</span>              i = find_first_mismatch(ids1, ids2)
<span style="color:#666; user-select:none;">294</span>              ids1 = list(ids1)[max(0, i - 2) : i + 5 + 1]
<span style="color:#666; user-select:none;">295</span>              ids2 = list(ids2)[max(0, i - 2) : i + 5 + 1]
<span style="color:#666; user-select:none;">296</span>              logger.info(&quot; TokenIDs: &quot; + str(ids1))
<span style="color:#666; user-select:none;">297</span>              logger.info(&quot; Expected: &quot; + str(ids2))
<span style="color:#666; user-select:none;">298</span>              raise Exception()
<span style="color:#666; user-select:none;">299</span>      t1 = time.perf_counter()
<span style="color:#666; user-select:none;">300</span>      logger.info(&quot;%s: end, time: %.3f secs&quot; % (generator.__name__, t1 - t0))
<span style="color:#666; user-select:none;">301</span>  
<span style="color:#666; user-select:none;">302</span>  
<span style="color:#666; user-select:none;">303</span>  def main(argv: list[str] = None):
<span style="color:#666; user-select:none;">304</span>      parser = argparse.ArgumentParser()
<span style="color:#666; user-select:none;">305</span>      parser.add_argument(&quot;vocab_file&quot;, help=&quot;path to vocab &#x27;gguf&#x27; file&quot;)
<span style="color:#666; user-select:none;">306</span>      parser.add_argument(&quot;dir_tokenizer&quot;, help=&quot;directory containing &#x27;tokenizer.model&#x27; file&quot;)
<span style="color:#666; user-select:none;">307</span>      parser.add_argument(&quot;--verbose&quot;, action=&quot;store_true&quot;, help=&quot;increase output verbosity&quot;)
<span style="color:#666; user-select:none;">308</span>      args = parser.parse_args(argv)
<span style="color:#666; user-select:none;">309</span>  
<span style="color:#666; user-select:none;">310</span>      logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
<span style="color:#666; user-select:none;">311</span>  
<span style="color:#666; user-select:none;">312</span>      model = LibLlamaModel(LibLlama(), args.vocab_file, mparams=dict(vocab_only=True), cparams=dict(n_ctx=4096))
<span style="color:#666; user-select:none;">313</span>      tokenizer = AutoTokenizer.from_pretrained(args.dir_tokenizer)
<span style="color:#666; user-select:none;">314</span>  
<span style="color:#666; user-select:none;">315</span>      def func_tokenize1(text: str):
<span style="color:#666; user-select:none;">316</span>          return model.tokenize(text, add_special=True, parse_special=True)
<span style="color:#666; user-select:none;">317</span>  
<span style="color:#666; user-select:none;">318</span>      def func_tokenize2(text: str):
<span style="color:#666; user-select:none;">319</span>          return tokenizer.encode(text, add_special_tokens=True)
<span style="color:#666; user-select:none;">320</span>  
<span style="color:#666; user-select:none;">321</span>      ids = func_tokenize2(&quot;a&quot;)
<span style="color:#666; user-select:none;">322</span>      assert 1 &lt;= len(ids) &lt;= 3
<span style="color:#666; user-select:none;">323</span>      add_bos_token = len(ids) &gt; 1 and tokenizer.bos_token_id == ids[0]
<span style="color:#666; user-select:none;">324</span>      tokenizer.add_bos_token = getattr(tokenizer, &quot;add_bos_token&quot;, add_bos_token)
<span style="color:#666; user-select:none;">325</span>  
<span style="color:#666; user-select:none;">326</span>      vocab = list(sorted(tokenizer.batch_decode(list(tokenizer.get_vocab().values()), skip_special_tokens=True)))
<span style="color:#666; user-select:none;">327</span>      test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_custom_text())
<span style="color:#666; user-select:none;">328</span>      test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_custom_text_edge_cases())
<span style="color:#666; user-select:none;">329</span>      test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_vocab_words(vocab))
<span style="color:#666; user-select:none;">330</span>      test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_added_lr_strip(tokenizer))
<span style="color:#666; user-select:none;">331</span>      test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_added_tokens(tokenizer, 10_000))
<span style="color:#666; user-select:none;">332</span>      test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_chars(10_000))
<span style="color:#666; user-select:none;">333</span>      test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_vocab_chars(vocab, 10_000))
<span style="color:#666; user-select:none;">334</span>      test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_vocab_words(vocab, 5_000))
<span style="color:#666; user-select:none;">335</span>      # test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_bytes(10_000)) # FAIL
<span style="color:#666; user-select:none;">336</span>  
<span style="color:#666; user-select:none;">337</span>      model.free()
<span style="color:#666; user-select:none;">338</span>  
<span style="color:#666; user-select:none;">339</span>  
<span style="color:#666; user-select:none;">340</span>  if __name__ == &quot;__main__&quot;:
<span style="color:#666; user-select:none;">341</span>      # main()
<span style="color:#666; user-select:none;">342</span>  
<span style="color:#666; user-select:none;">343</span>      path_tokenizers   = &quot;./models/tokenizers/&quot;
<span style="color:#666; user-select:none;">344</span>      path_vocab_format = &quot;./models/ggml-vocab-%s.gguf&quot;
<span style="color:#666; user-select:none;">345</span>  
<span style="color:#666; user-select:none;">346</span>      # import os
<span style="color:#666; user-select:none;">347</span>      # tokenizers = os.listdir(path_tokenizers)
<span style="color:#666; user-select:none;">348</span>      tokenizers = [
<span style="color:#666; user-select:none;">349</span>          &quot;llama-spm&quot;,   # SPM
<span style="color:#666; user-select:none;">350</span>          &quot;phi-3&quot;,       # SPM
<span style="color:#666; user-select:none;">351</span>          &quot;jina-v2-en&quot;,  # WPM
<span style="color:#666; user-select:none;">352</span>          &quot;bert-bge&quot;,    # WPM
<span style="color:#666; user-select:none;">353</span>      ]
<span style="color:#666; user-select:none;">354</span>  
<span style="color:#666; user-select:none;">355</span>      for tokenizer in tokenizers:
<span style="color:#666; user-select:none;">356</span>          print(&quot;\n&quot; + &quot;=&quot; * 50 + &quot;\n&quot; + tokenizer + &quot;\n&quot;)  # noqa
<span style="color:#666; user-select:none;">357</span>          vocab_file = path_vocab_format % tokenizer
<span style="color:#666; user-select:none;">358</span>          dir_tokenizer = path_tokenizers + &quot;/&quot; + tokenizer
<span style="color:#666; user-select:none;">359</span>          main([vocab_file, dir_tokenizer, &quot;--verbose&quot;])
</pre>
        </div>
        </div>

</div>
</main>
</div>
</div>


</body>
</html>

