llama.vim
1 " Requires an already running llama.cpp server 2 " To install either copy or symlink to ~/.vim/autoload/llama.vim 3 " Then start with either :call llama#doLlamaGen(), 4 " or add a keybind to your vimrc such as 5 " nnoremap Z :call llama#doLlamaGen()<CR> 6 " Similarly, you could add an insert mode keybind with 7 " inoremap <C-B> <Cmd>call llama#doLlamaGen()<CR> 8 " 9 " g:llama_api_url, g:llama_api_key and g:llama_overrides can be configured in your .vimrc 10 " let g:llama_api_url = "192.168.1.10:8080" 11 " llama_overrides can also be set through buffer/window scopes. For instance 12 " autocmd filetype python let b:llama_overrides = {"temp": 0.2} 13 " Could be added to your .vimrc to automatically set a lower temperature when 14 " editing a python script 15 " Additionally, an override dict can be stored at the top of a file 16 " !*{"stop": ["User:"]} 17 " Could be added to the start of your chatlog.txt to set the stopping token 18 " These parameter dicts are merged together from lowest to highest priority: 19 " server default -> g:llama_overrides -> w:llama_overrides -> 20 " b:llama_overrides -> in file (!*) overrides 21 " 22 " Sublists (like logit_bias and stop) are overridden, not merged 23 " Example override: 24 " !*{"logit_bias": [[13, -5], [2, false]], "temperature": 1, "top_k": 5, "top_p": 0.5, "n_predict": 256, "repeat_last_n": 256, "repeat_penalty": 1.17647} 25 if !exists("g:llama_api_url") 26 let g:llama_api_url= "127.0.0.1:8080" 27 endif 28 if !exists("g:llama_overrides") 29 let g:llama_overrides = {} 30 endif 31 const s:querydata = {"n_predict": 256, "stop": [ "\n" ], "stream": v:true } 32 const s:curlcommand = ['curl','--data-raw', "{\"prompt\":\"### System:\"}", '--silent', '--no-buffer', '--request', 'POST', '--url', g:llama_api_url .. '/completion', '--header', "Content-Type: application/json"] 33 let s:linedict = {} 34 35 func s:callbackHandler(bufn, channel, msg) 36 if len(a:msg) < 3 37 return 38 elseif a:msg[0] == "d" 39 let l:msg = a:msg[6:-1] 40 else 41 let l:msg = a:msg 42 endif 43 let l:decoded_msg = json_decode(l:msg) 44 let l:newtext = split(l:decoded_msg['content'], "\n", 1) 45 if len(l:newtext) > 0 46 call setbufline(a:bufn, s:linedict[a:bufn], getbufline(a:bufn, s:linedict[a:bufn])[0] .. newtext[0]) 47 else 48 echo "nothing genned" 49 endif 50 if len(newtext) > 1 51 let l:failed = appendbufline(a:bufn, s:linedict[a:bufn], newtext[1:-1]) 52 let s:linedict[a:bufn] = s:linedict[a:bufn] + len(newtext)-1 53 endif 54 if has_key(l:decoded_msg, "stop") && l:decoded_msg.stop 55 echo "Finished generation" 56 endif 57 endfunction 58 59 func llama#doLlamaGen() 60 if exists("b:job") 61 if job_status(b:job) == "run" 62 call job_stop(b:job) 63 return 64 endif 65 endif 66 67 let l:cbuffer = bufnr("%") 68 let s:linedict[l:cbuffer] = line('$') 69 let l:buflines = getbufline(l:cbuffer, 1, 1000) 70 let l:querydata = copy(s:querydata) 71 call extend(l:querydata, g:llama_overrides) 72 if exists("w:llama_overrides") 73 call extend(l:querydata, w:llama_overrides) 74 endif 75 if exists("b:llama_overrides") 76 call extend(l:querydata, b:llama_overrides) 77 endif 78 if l:buflines[0][0:1] == '!*' 79 let l:userdata = json_decode(l:buflines[0][2:-1]) 80 call extend(l:querydata, l:userdata) 81 let l:buflines = l:buflines[1:-1] 82 endif 83 let l:querydata.prompt = join(l:buflines, "\n") 84 let l:curlcommand = copy(s:curlcommand) 85 if exists("g:llama_api_key") 86 call extend(l:curlcommand, ['--header', 'Authorization: Bearer ' .. g:llama_api_key]) 87 endif 88 let l:curlcommand[2] = json_encode(l:querydata) 89 let b:job = job_start(l:curlcommand, {"callback": function("s:callbackHandler", [l:cbuffer])}) 90 endfunction 91 92 " Echos the tokkenization of the provided string , or cursor to end of word 93 " Onus is placed on the user to include the preceding space 94 func llama#tokenizeWord(...) 95 if (a:0 > 0) 96 let l:input = a:1 97 else 98 exe "normal \"*ye" 99 let l:input = @* 100 endif 101 let l:querydata = {"content": l:input} 102 let l:curlcommand = copy(s:curlcommand) 103 let l:curlcommand[2] = json_encode(l:querydata) 104 let l:curlcommand[8] = g:llama_api_url .. "/tokenize" 105 let s:token_job = job_start(l:curlcommand, {"callback": function("s:tokenizeWordCallback", [l:input])}) 106 endfunction 107 108 func s:tokenizeWordCallback(plaintext, channel, msg) 109 echo '"' .. a:plaintext ..'" - ' .. string(json_decode(a:msg).tokens) 110 endfunction 111 112 113 " Echos the token count of the entire buffer (or provided string) 114 " Example usage :echo llama#tokenCount() 115 func llama#tokenCount(...) 116 if (a:0 > 0) 117 let l:buflines = a:1 118 else 119 let l:buflines = getline(1,1000) 120 if l:buflines[0][0:1] == '!*' 121 let l:buflines = l:buflines[1:-1] 122 endif 123 let l:buflines = join(l:buflines, "\n") 124 endif 125 let l:querydata = {"content": l:buflines} 126 let l:curlcommand = copy(s:curlcommand) 127 let l:curlcommand[2] = json_encode(l:querydata) 128 let l:curlcommand[8] = g:llama_api_url .. "/tokenize" 129 let s:token_job = job_start(l:curlcommand, {"callback": "s:tokenCountCallback"}) 130 endfunction 131 132 func s:tokenCountCallback(channel, msg) 133 let resp = json_decode(a:msg) 134 echo len(resp.tokens) 135 endfunction