Skip to content

Commit 955ac33

Browse files
committed
Update llama.cpp API 20260310
1 parent 7efcb2b commit 955ac33

1 file changed

Lines changed: 58 additions & 16 deletions

File tree

llama_cpp/llama_cpp.py

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,6 +1232,47 @@ def llama_numa_init(numa: int, /):
12321232
# TODO: Add llama_detach_threadpool
12331233

12341234

1235+
# typedef void (*llama_model_set_tensor_data_t)(struct ggml_tensor * tensor, void * userdata);
1236+
llama_model_set_tensor_data_t = ctypes.CFUNCTYPE(
1237+
None,
1238+
ctypes.c_void_p,
1239+
ctypes.c_void_p
1240+
)
1241+
1242+
1243+
# // Create a new model from GGUF metadata as well as a function to set the tensor data
1244+
# // - tensors are created as GGML_TYPE_F32 by default,
1245+
# // override by adding a tensor with the same name but a different name to the context
1246+
# LLAMA_API struct llama_model * llama_model_init_from_user(
1247+
# struct gguf_context * metadata,
1248+
# llama_model_set_tensor_data_t set_tensor_data, // function to initialize tensor data with
1249+
# void * set_tensor_data_ud, // userdata for function
1250+
# struct llama_model_params params);
1251+
@ctypes_function(
1252+
"llama_model_init_from_user",
1253+
[
1254+
ctypes.c_void_p,
1255+
llama_model_set_tensor_data_t,
1256+
ctypes.c_void_p,
1257+
llama_model_params
1258+
],
1259+
llama_model_p_ctypes,
1260+
)
1261+
def llama_model_init_from_user(
1262+
metadata: ctypes.c_void_p,
1263+
set_tensor_data: llama_model_set_tensor_data_t,
1264+
set_tensor_data_ud: ctypes.c_void_p,
1265+
params: llama_model_params,
1266+
/
1267+
) -> Optional[llama_model_p]:
1268+
"""
1269+
Create a new model from GGUF metadata as well as a function to set the tensor data
1270+
- tensors are created as GGML_TYPE_F32 by default,
1271+
override by adding a tensor with the same name but a different name to the context
1272+
"""
1273+
...
1274+
1275+
12351276
# DEPRECATED(LLAMA_API struct llama_model * llama_load_model_from_file(
12361277
# const char * path_model,
12371278
# struct llama_model_params params),
@@ -1247,7 +1288,7 @@ def llama_load_model_from_file(
12471288
...
12481289

12491290

1250-
# // Load the model from a file
1291+
# // Load a model from a file
12511292
# // If the file is split into multiple parts, the file name must follow this pattern: <name>-%05d-of-%05d.gguf
12521293
# // If the split file name does not follow this pattern, use llama_model_load_from_splits
12531294
# LLAMA_API struct llama_model * llama_model_load_from_file(
@@ -1261,15 +1302,15 @@ def llama_load_model_from_file(
12611302
def llama_model_load_from_file(
12621303
path_model: bytes, params: llama_model_params, /
12631304
) -> Optional[llama_model_p]:
1264-
"""Load the model from a file
1265-
1305+
"""
1306+
Load a model from a file
12661307
If the file is split into multiple parts, the file name must follow this pattern: <name>-%05d-of-%05d.gguf
1267-
1268-
If the split file name does not follow this pattern, use llama_model_load_from_splits"""
1308+
If the split file name does not follow this pattern, use llama_model_load_from_splits
1309+
"""
12691310
...
12701311

12711312

1272-
# // Load the model from multiple splits (support custom naming scheme)
1313+
# // Load a model from multiple splits (support custom naming scheme)
12731314
# // The paths must be in the correct order
12741315
# LLAMA_API struct llama_model * llama_model_load_from_splits(
12751316
# const char ** paths,
@@ -1283,9 +1324,10 @@ def llama_model_load_from_file(
12831324
def llama_model_load_from_splits(
12841325
paths: list[bytes], n_paths: int, params: llama_model_params, /
12851326
) -> Optional[llama_model_p]:
1286-
"""Load the model from multiple splits (support custom naming scheme)
1287-
1288-
The paths must be in the correct order"""
1327+
"""
1328+
Load a model from multiple splits (support custom naming scheme)
1329+
The paths must be in the correct order
1330+
"""
12891331
...
12901332

12911333

@@ -2982,7 +3024,7 @@ def llama_get_logits(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_float]:
29823024

29833025
# // Logits for the ith token. For positive indices, Equivalent to:
29843026
# // llama_get_logits(ctx) + ctx->output_ids[i]*n_vocab
2985-
# // Negative indicies can be used to access logits in reverse order, -1 is the last logit.
3027+
# // Negative indices can be used to access logits in reverse order, -1 is the last logit.
29863028
# // returns NULL for invalid ids.
29873029
# LLAMA_API float * llama_get_logits_ith(struct llama_context * ctx, int32_t i);
29883030
@ctypes_function(
@@ -3017,7 +3059,7 @@ def llama_get_embeddings(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_float]
30173059

30183060
# // Get the embeddings for the ith token. For positive indices, Equivalent to:
30193061
# // llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
3020-
# // Negative indicies can be used to access embeddings in reverse order, -1 is the last embedding.
3062+
# // Negative indices can be used to access embeddings in reverse order, -1 is the last embedding.
30213063
# // shape: [n_embd] (1-dimensional)
30223064
# // returns NULL for invalid ids.
30233065
# LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
@@ -3076,9 +3118,9 @@ def llama_get_sampled_token_ith(
30763118
...
30773119

30783120

3079-
# // Get the backend sampled probabilites for the ith token
3121+
# // Get the backend sampled probabilities for the ith token
30803122
# // The index matches llama_get_sampled_token_ith().
3081-
# // Returns NULL if no probabilites were generated.
3123+
# // Returns NULL if no probabilities were generated.
30823124
# LLAMA_API float * llama_get_sampled_probs_ith (struct llama_context * ctx, int32_t i);
30833125
@ctypes_function(
30843126
"llama_get_sampled_probs_ith",
@@ -3089,9 +3131,9 @@ def llama_get_sampled_probs_ith(
30893131
ctx: llama_context_p, i: ctypes.c_int32, /
30903132
) -> CtypesArray[ctypes.c_float]:
30913133
"""
3092-
Get the backend sampled probabilites for the ith token
3134+
Get the backend sampled probabilities for the ith token
30933135
The index matches llama_get_sampled_token_ith().
3094-
Returns NULL if no probabilites were generated.
3136+
Returns NULL if no probabilities were generated.
30953137
"""
30963138
...
30973139

@@ -4345,7 +4387,7 @@ def llama_sampler_init_mirostat_v2(
43454387
...
43464388

43474389

4348-
# /// @details Intializes a GBNF grammar, see grammars/README.md for details.
4390+
# /// @details Initializes a GBNF grammar, see grammars/README.md for details.
43494391
# /// @param vocab The vocabulary that this grammar will be used with.
43504392
# /// @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails.
43514393
# /// @param grammar_root The name of the start symbol for the grammar.

0 commit comments

Comments
 (0)