Skip to content

Commit 7efcb2b

Browse files
committed
fix(chat_format): fix namespace and variable shadowing of llama modules
- Changed imports to use `llama_cpp_lib` and `llama_core` to avoid namespace collisions. - Fixed severe variable shadowing where the `llama` module was being overshadowed by the `llama` parameter in function signatures. - Updated associated type hints and C-API bindings to use the new isolated aliases. - Corrected `LlamaGrammar` type definitions to point to the `llama_grammar` module.
1 parent 5e285fe commit 7efcb2b

1 file changed

Lines changed: 27 additions & 27 deletions

File tree

llama_cpp/llama_chat_format.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
import urllib.request
3434
from urllib.error import URLError, HTTPError
3535

36-
import llama_cpp.llama_cpp as llama_cpp
37-
import llama_cpp.llama as llama
36+
import llama_cpp.llama_cpp as llama_cpp_lib
37+
import llama_cpp.llama as llama_core
3838
import llama_cpp.llama_types as llama_types
3939
import llama_cpp.llama_grammar as llama_grammar
4040

@@ -85,7 +85,7 @@ def __call__(
8585
self,
8686
*,
8787
# llama.cpp instance
88-
llama: llama.Llama,
88+
llama: llama_core.Llama,
8989
# openai api parameters
9090
messages: List[llama_types.ChatCompletionRequestMessage],
9191
functions: Optional[List[llama_types.ChatCompletionFunction]] = None,
@@ -124,8 +124,8 @@ def __call__(
124124
adaptive_target : float = -1.0,
125125
adaptive_decay : float = 0.9,
126126
use_infill: bool = False,
127-
logits_processor: Optional[llama.LogitsProcessorList] = None,
128-
grammar: Optional[llama.LlamaGrammar] = None,
127+
logits_processor: Optional[llama_core.LogitsProcessorList] = None,
128+
grammar: Optional[llama_grammar.LlamaGrammar] = None,
129129
logprobs: Optional[bool] = None,
130130
top_logprobs: Optional[int] = None,
131131
**kwargs, # type: ignore
@@ -199,7 +199,7 @@ class ChatFormatterResponse:
199199

200200
prompt: str
201201
stop: Optional[Union[str, List[str]]] = None
202-
stopping_criteria: Optional[llama.StoppingCriteriaList] = None
202+
stopping_criteria: Optional[llama_core.StoppingCriteriaList] = None
203203
added_special: bool = False
204204

205205

@@ -281,7 +281,7 @@ def stop_on_last_token(
281281
) -> bool:
282282
return tokens[-1] in self.stop_token_ids
283283

284-
stopping_criteria = llama.StoppingCriteriaList([stop_on_last_token])
284+
stopping_criteria = llama_core.StoppingCriteriaList([stop_on_last_token])
285285

286286
return ChatFormatterResponse(
287287
prompt=prompt,
@@ -585,7 +585,7 @@ def chat_formatter_to_chat_completion_handler(
585585
) -> LlamaChatCompletionHandler:
586586
def chat_completion_handler(
587587
*,
588-
llama: llama.Llama,
588+
llama: llama_core.Llama,
589589
messages: List[llama_types.ChatCompletionRequestMessage],
590590
functions: Optional[List[llama_types.ChatCompletionFunction]] = None,
591591
function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None,
@@ -621,8 +621,8 @@ def chat_completion_handler(
621621
adaptive_decay : float = 0.9,
622622
use_infill: bool = False,
623623
model: Optional[str] = None,
624-
logits_processor: Optional[llama.LogitsProcessorList] = None,
625-
grammar: Optional[llama.LlamaGrammar] = None,
624+
logits_processor: Optional[llama_core.LogitsProcessorList] = None,
625+
grammar: Optional[llama_grammar.LlamaGrammar] = None,
626626
logit_bias: Optional[Dict[str, float]] = None,
627627
logprobs: Optional[bool] = None,
628628
top_logprobs: Optional[int] = None,
@@ -1467,7 +1467,7 @@ def format_gemma(
14671467

14681468
@register_chat_completion_handler("functionary")
14691469
def functionary_chat_handler(
1470-
llama: llama.Llama,
1470+
llama: llama_core.Llama,
14711471
messages: List[llama_types.ChatCompletionRequestMessage],
14721472
functions: Optional[List[llama_types.ChatCompletionFunction]] = None,
14731473
function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None,
@@ -1500,8 +1500,8 @@ def functionary_chat_handler(
15001500
adaptive_decay : float = 0.9,
15011501
use_infill: bool = False,
15021502
model: Optional[str] = None,
1503-
logits_processor: Optional[llama.LogitsProcessorList] = None,
1504-
grammar: Optional[llama.LlamaGrammar] = None,
1503+
logits_processor: Optional[llama_core.LogitsProcessorList] = None,
1504+
grammar: Optional[llama_grammar.LlamaGrammar] = None,
15051505
**kwargs, # type: ignore
15061506
) -> Union[llama_types.ChatCompletion, Iterator[llama_types.ChatCompletionChunk]]:
15071507
SYSTEM_MESSAGE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"""
@@ -1856,7 +1856,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
18561856
@register_chat_completion_handler("functionary-v1")
18571857
@register_chat_completion_handler("functionary-v2")
18581858
def functionary_v1_v2_chat_handler(
1859-
llama: llama.Llama,
1859+
llama: llama_core.Llama,
18601860
messages: List[llama_types.ChatCompletionRequestMessage],
18611861
functions: Optional[List[llama_types.ChatCompletionFunction]] = None,
18621862
function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None,
@@ -1889,8 +1889,8 @@ def functionary_v1_v2_chat_handler(
18891889
adaptive_decay : float = 0.9,
18901890
use_infill: bool = False,
18911891
model: Optional[str] = None,
1892-
logits_processor: Optional[llama.LogitsProcessorList] = None,
1893-
grammar: Optional[llama.LlamaGrammar] = None,
1892+
logits_processor: Optional[llama_core.LogitsProcessorList] = None,
1893+
grammar: Optional[llama_grammar.LlamaGrammar] = None,
18941894
**kwargs, # type: ignore
18951895
) -> Union[llama_types.ChatCompletion, Iterator[llama_types.ChatCompletionChunk]]:
18961896
SYSTEM_MESSAGE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"""
@@ -2868,7 +2868,7 @@ def __init__(
28682868

28692869
self._exit_stack = ExitStack()
28702870

2871-
def _init_mtmd_context(self, llama_model: llama.Llama):
2871+
def _init_mtmd_context(self, llama_model: llama_core.Llama):
28722872
"""Initialize mtmd context with the llama model."""
28732873
if self.mtmd_ctx is not None:
28742874
return # Already initialized
@@ -3047,7 +3047,7 @@ def _create_bitmap_from_bytes(self, media_bytes: bytes):
30473047

30483048
def _process_mtmd_prompt(
30493049
self,
3050-
llama: llama.Llama,
3050+
llama: llama_core.Llama,
30513051
messages: List[llama_types.ChatCompletionRequestMessage],
30523052
) -> Tuple[List[int], List[tuple], Any, List[Any]]:
30533053
"""
@@ -3212,7 +3212,7 @@ def _create_bitmap_func(idx: int, item: str):
32123212
def __call__(
32133213
self,
32143214
*,
3215-
llama: llama.Llama,
3215+
llama: llama_core.Llama,
32163216
messages: List[llama_types.ChatCompletionRequestMessage],
32173217
functions: Optional[List[llama_types.ChatCompletionFunction]] = None,
32183218
function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None,
@@ -3248,8 +3248,8 @@ def __call__(
32483248
adaptive_decay : float = 0.9,
32493249
use_infill: bool = False,
32503250
model: Optional[str] = None,
3251-
logits_processor: Optional[llama.LogitsProcessorList] = None,
3252-
grammar: Optional[llama.LlamaGrammar] = None,
3251+
logits_processor: Optional[llama_core.LogitsProcessorList] = None,
3252+
grammar: Optional[llama_grammar.LlamaGrammar] = None,
32533253
logit_bias: Optional[Dict[str, float]] = None,
32543254
logprobs: Optional[bool] = None,
32553255
top_logprobs: Optional[int] = None,
@@ -3367,13 +3367,13 @@ def __call__(
33673367
llama.n_tokens = n_past
33683368

33693369
# Execute C++ Multimodal Black-box Extraction
3370-
new_n_past = llama_cpp.llama_pos(0)
3370+
new_n_past = llama_cpp_lib.llama_pos(0)
33713371
result = self._mtmd_cpp.mtmd_helper_eval_chunk_single(
33723372
self.mtmd_ctx,
33733373
llama._ctx.ctx,
33743374
chunk_ptr,
3375-
llama_cpp.llama_pos(n_past),
3376-
llama_cpp.llama_seq_id(0),
3375+
llama_cpp_lib.llama_pos(n_past),
3376+
llama_cpp_lib.llama_seq_id(0),
33773377
llama.n_batch,
33783378
True, # logits_last = True, drastically saves computational overhead
33793379
ctypes.byref(new_n_past)
@@ -5022,7 +5022,7 @@ def __call__(self, **kwargs):
50225022

50235023
@register_chat_completion_handler("chatml-function-calling")
50245024
def chatml_function_calling(
5025-
llama: llama.Llama,
5025+
llama: llama_core.Llama,
50265026
messages: List[llama_types.ChatCompletionRequestMessage],
50275027
functions: Optional[List[llama_types.ChatCompletionFunction]] = None,
50285028
function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None,
@@ -5055,8 +5055,8 @@ def chatml_function_calling(
50555055
adaptive_decay : float = 0.9,
50565056
use_infill: bool = False,
50575057
model: Optional[str] = None,
5058-
logits_processor: Optional[llama.LogitsProcessorList] = None,
5059-
grammar: Optional[llama.LlamaGrammar] = None,
5058+
logits_processor: Optional[llama_core.LogitsProcessorList] = None,
5059+
grammar: Optional[llama_grammar.LlamaGrammar] = None,
50605060
logprobs: Optional[bool] = None,
50615061
top_logprobs: Optional[int] = None,
50625062
**kwargs, # type: ignore

0 commit comments

Comments
 (0)