/ examples / llama.vim
llama.vim
  1  " Requires an already running llama.cpp server
  2  " To install either copy or symlink to ~/.vim/autoload/llama.vim
  3  " Then start with either :call llama#doLlamaGen(),
  4  " or add a keybind to your vimrc such as
  5  " nnoremap Z :call llama#doLlamaGen()<CR>
  6  " Similarly, you could add an insert mode keybind with
  7  " inoremap <C-B> <Cmd>call llama#doLlamaGen()<CR>
  8  "
  9  " g:llama_api_url, g:llama_api_key and g:llama_overrides can be configured in your .vimrc
 10  " let g:llama_api_url = "192.168.1.10:8080"
 11  " llama_overrides can also be set through buffer/window scopes. For instance
 12  " autocmd filetype python let b:llama_overrides = {"temp": 0.2}
 13  " Could be added to your .vimrc to automatically set a lower temperature when
 14  " editing a python script
 15  " Additionally, an override dict can be stored at the top of a file
 16  " !*{"stop": ["User:"]}
 17  " Could be added to the start of your chatlog.txt to set the stopping token
 18  " These parameter dicts are merged together from lowest to highest priority:
 19  " server default -> g:llama_overrides -> w:llama_overrides ->
 20  " b:llama_overrides -> in file (!*) overrides
 21  "
 22  " Sublists (like logit_bias and stop) are overridden, not merged
 23  " Example override:
 24  " !*{"logit_bias": [[13, -5], [2, false]], "temperature": 1, "top_k": 5, "top_p": 0.5, "n_predict": 256, "repeat_last_n": 256, "repeat_penalty": 1.17647}
 25  if !exists("g:llama_api_url")
 26      let g:llama_api_url= "127.0.0.1:8080"
 27  endif
 28  if !exists("g:llama_overrides")
 29     let g:llama_overrides = {}
 30  endif
 31  const s:querydata = {"n_predict": 256, "stop": [ "\n" ], "stream": v:true }
 32  const s:curlcommand = ['curl','--data-raw', "{\"prompt\":\"### System:\"}", '--silent', '--no-buffer', '--request', 'POST', '--url', g:llama_api_url .. '/completion', '--header', "Content-Type: application/json"]
 33  let s:linedict = {}
 34  
 35  func s:callbackHandler(bufn, channel, msg)
 36     if len(a:msg) < 3
 37        return
 38     elseif a:msg[0] == "d"
 39        let l:msg = a:msg[6:-1]
 40     else
 41        let l:msg = a:msg
 42     endif
 43     let l:decoded_msg = json_decode(l:msg)
 44     let l:newtext = split(l:decoded_msg['content'], "\n", 1)
 45     if len(l:newtext) > 0
 46        call setbufline(a:bufn, s:linedict[a:bufn], getbufline(a:bufn, s:linedict[a:bufn])[0] .. newtext[0])
 47     else
 48        echo "nothing genned"
 49     endif
 50     if len(newtext) > 1
 51        let l:failed = appendbufline(a:bufn, s:linedict[a:bufn], newtext[1:-1])
 52        let s:linedict[a:bufn] = s:linedict[a:bufn] + len(newtext)-1
 53     endif
 54     if has_key(l:decoded_msg, "stop") && l:decoded_msg.stop
 55         echo "Finished generation"
 56     endif
 57  endfunction
 58  
 59  func llama#doLlamaGen()
 60     if exists("b:job")
 61        if job_status(b:job) == "run"
 62           call job_stop(b:job)
 63           return
 64        endif
 65     endif
 66  
 67     let l:cbuffer = bufnr("%")
 68     let s:linedict[l:cbuffer] = line('$')
 69     let l:buflines = getbufline(l:cbuffer, 1, 1000)
 70     let l:querydata = copy(s:querydata)
 71     call extend(l:querydata, g:llama_overrides)
 72     if exists("w:llama_overrides")
 73        call extend(l:querydata, w:llama_overrides)
 74     endif
 75     if exists("b:llama_overrides")
 76        call extend(l:querydata, b:llama_overrides)
 77     endif
 78     if l:buflines[0][0:1] == '!*'
 79        let l:userdata = json_decode(l:buflines[0][2:-1])
 80        call extend(l:querydata, l:userdata)
 81        let l:buflines = l:buflines[1:-1]
 82     endif
 83     let l:querydata.prompt = join(l:buflines, "\n")
 84     let l:curlcommand = copy(s:curlcommand)
 85     if exists("g:llama_api_key")
 86         call extend(l:curlcommand, ['--header', 'Authorization: Bearer ' .. g:llama_api_key])
 87     endif
 88     let l:curlcommand[2] = json_encode(l:querydata)
 89     let b:job = job_start(l:curlcommand, {"callback": function("s:callbackHandler", [l:cbuffer])})
 90  endfunction
 91  
 92  " Echos the tokkenization of the provided string , or cursor to end of word
 93  " Onus is placed on the user to include the preceding space
 94  func llama#tokenizeWord(...)
 95      if (a:0 > 0)
 96          let l:input = a:1
 97      else
 98          exe "normal \"*ye"
 99          let l:input = @*
100      endif
101      let l:querydata = {"content": l:input}
102      let l:curlcommand = copy(s:curlcommand)
103      let l:curlcommand[2] = json_encode(l:querydata)
104      let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
105     let s:token_job = job_start(l:curlcommand, {"callback": function("s:tokenizeWordCallback", [l:input])})
106  endfunction
107  
108  func s:tokenizeWordCallback(plaintext, channel, msg)
109      echo '"' .. a:plaintext ..'" - ' .. string(json_decode(a:msg).tokens)
110  endfunction
111  
112  
113  " Echos the token count of the entire buffer (or provided string)
114  " Example usage :echo llama#tokenCount()
115  func llama#tokenCount(...)
116      if (a:0 > 0)
117          let l:buflines = a:1
118      else
119          let l:buflines = getline(1,1000)
120          if l:buflines[0][0:1] == '!*'
121              let l:buflines = l:buflines[1:-1]
122          endif
123          let l:buflines = join(l:buflines, "\n")
124      endif
125      let l:querydata = {"content": l:buflines}
126      let l:curlcommand = copy(s:curlcommand)
127      let l:curlcommand[2] = json_encode(l:querydata)
128      let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
129     let s:token_job = job_start(l:curlcommand, {"callback": "s:tokenCountCallback"})
130  endfunction
131  
132  func s:tokenCountCallback(channel, msg)
133      let resp = json_decode(a:msg)
134      echo len(resp.tokens)
135  endfunction