/ ggml-alloc.h
ggml-alloc.h
1 #pragma once 2 3 #include "ggml.h" 4 5 #ifdef __cplusplus 6 extern "C" { 7 #endif 8 9 typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; 10 typedef struct ggml_backend_buffer * ggml_backend_buffer_t; 11 typedef struct ggml_backend * ggml_backend_t; 12 13 // Tensor allocator 14 struct ggml_tallocr { 15 ggml_backend_buffer_t buffer; 16 void * base; 17 size_t alignment; 18 size_t offset; 19 }; 20 21 GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer); 22 GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); 23 24 // Graph allocator 25 /* 26 Example usage: 27 ggml_gallocr_t galloc = ggml_gallocr_new(ggml_bacckend_cpu_buffer_type()); 28 29 // optional: create a worst-case graph and reserve the buffers to avoid reallocations 30 ggml_gallocr_reserve(galloc, build_graph(max_batch)); 31 32 // allocate the graph 33 struct ggml_cgraph * graph = build_graph(batch); 34 ggml_gallocr_alloc_graph(galloc, graph); 35 36 printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0)); 37 38 // evaluate the graph 39 ggml_backend_graph_compute(backend, graph); 40 */ 41 42 // special tensor flags for use with the graph allocator: 43 // ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses 44 // ggml_set_output(): output tensors are never freed and never overwritten 45 46 typedef struct ggml_gallocr * ggml_gallocr_t; 47 48 GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); 49 GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); 50 GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); 51 52 // pre-allocate buffers from a measure graph - does not allocate or modify the graph 53 // call with a worst-case graph to avoid buffer reallocations 54 // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed 55 // returns false if the buffer allocation failed 56 GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); 57 GGML_API bool ggml_gallocr_reserve_n( 58 ggml_gallocr_t galloc, 59 struct ggml_cgraph * graph, 60 const int * node_buffer_ids, 61 const int * leaf_buffer_ids); 62 63 // automatic reallocation if the topology changes when using a single buffer 64 // returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) 65 GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph); 66 67 GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id); 68 69 // Utils 70 // Create a buffer and allocate all the tensors in a ggml_context 71 GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); 72 GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); 73 74 #ifdef __cplusplus 75 } 76 #endif