LLAMA: added apis to penalty_freq and penalty_present

Chris Warren-Smith · Chris Warren-Smith · commit 4399cbcd1342 · 2026-05-01T20:06:04.000+09:30
diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
@@ -130,6 +130,8 @@ target_include_directories(llm PRIVATE
 target_link_libraries(llm PRIVATE
   llama
   ggml
+  # force dynamic libm
+  -Wl,-Bdynamic,-lm
 )
 
 # Include all static code into plugin
diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
@@ -37,6 +37,8 @@ Llama::Llama() :
   _vocab(nullptr),
   _penalty_last_n(0),
   _penalty_repeat(0),
+  _penalty_freq(0.0f),
+  _penalty_present(0.0f),
   _temperature(0),
   _top_p(0),
   _min_p(0),
@@ -65,6 +67,8 @@ Llama::Llama(Llama &&other) noexcept
   , _last_error(std::move(other._last_error))
   , _penalty_last_n(other._penalty_last_n)
   , _penalty_repeat(other._penalty_repeat)
+  , _penalty_freq(other._penalty_freq)
+  , _penalty_present(other._penalty_present)
   , _temperature(other._temperature)
   , _top_p(other._top_p)
   , _min_p(other._min_p)
@@ -92,6 +96,8 @@ void Llama::reset() {
   _last_error = "";
   _penalty_last_n = 64;
   _penalty_repeat = 1.1f;
+  _penalty_freq = 0.0f;
+  _penalty_present = 0.0f;
   _temperature = 0;
   _top_k = 0;
   _top_p = 1.0f;
@@ -155,7 +161,7 @@ bool Llama::configure_sampler() {
     llama_sampler_chain_add(chain, grammar);
   }
   if (_penalty_last_n != 0 && _penalty_repeat != 1.0f) {
-    auto penalties = llama_sampler_init_penalties(_penalty_last_n, _penalty_repeat, 0.0f, 0.0f);
+    auto penalties = llama_sampler_init_penalties(_penalty_last_n, _penalty_repeat, _penalty_freq, _penalty_present);
     llama_sampler_chain_add(chain, penalties);
   }
   if (_temperature <= 0.0f) {
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
@@ -60,6 +60,8 @@ struct Llama {
   void clear_stops() { _stop_sequences.clear(); }
   void set_penalty_last_n(int32_t penalty_last_n) { _penalty_last_n = penalty_last_n; }
   void set_penalty_repeat(float penalty_repeat) { _penalty_repeat = penalty_repeat; }
+  void set_penalty_freq(float penalty_freq) { _penalty_freq = penalty_freq; }
+  void set_penalty_present(float penalty_present) { _penalty_present = penalty_present; }
   void set_max_tokens(int max_tokens) { _max_tokens = max_tokens; }
   void set_min_p(float min_p) { _min_p = min_p; }
   void set_temperature(float temperature) { _temperature = temperature; }
@@ -90,6 +92,8 @@ struct Llama {
   string _last_error;
   int32_t _penalty_last_n;
   float _penalty_repeat;
+  float _penalty_freq;
+  float _penalty_present;
   float _temperature;
   float _top_p;
   float _min_p;
diff --git a/llama/llama.cpp b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit e365e658f07b63371489570dfde597f199b26c23
+Subproject commit aab68217b7bd8907135dd41fbb5bcb85fca06045
diff --git a/llama/main.cpp b/llama/main.cpp
@@ -104,6 +104,46 @@ static int cmd_llama_set_penalty_repeat(var_s *self, int argc, slib_par_t *arg,
   return result;
 }
 
+//
+// llama.set_penalty_freq(0.8)
+//
+static int cmd_llama_set_penalty_freq(var_s *self, int argc, slib_par_t *arg, var_s *retval) {
+  int result = 0;
+  if (argc != 1) {
+    error(retval, "llama.set_penalty_freq", 1, 1);
+  } else {
+    int id = get_llama_class_id(self, retval);
+    if (id != -1) {
+      Llama &llama = g_llama.at(id);
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_penalty_freq(value);
+      v_setreal(map_add_var(self, "penalty_freq", 0), value);
+      result = 1;
+    }
+  }
+  return result;
+}
+
+//
+// llama.set_penalty_present(0.8)
+//
+static int cmd_llama_set_penalty_present(var_s *self, int argc, slib_par_t *arg, var_s *retval) {
+  int result = 0;
+  if (argc != 1) {
+    error(retval, "llama.set_penalty_present", 1, 1);
+  } else {
+    int id = get_llama_class_id(self, retval);
+    if (id != -1) {
+      Llama &llama = g_llama.at(id);
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_penalty_present(value);
+      v_setreal(map_add_var(self, "penalty_present", 0), value);
+      result = 1;
+    }
+  }
+  return result;
+}
+
 //
 // llama.set_penalty_last_n(0.8)
 //
@@ -404,6 +444,8 @@ static int cmd_create_llama(int argc, slib_par_t *params, var_t *retval) {
     v_create_callback(retval, "generate", cmd_llama_generate);
     v_create_callback(retval, "reset", cmd_llama_reset);
     v_create_callback(retval, "set_penalty_repeat", cmd_llama_set_penalty_repeat);
+    v_create_callback(retval, "set_penalty_freq", cmd_llama_set_penalty_freq);
+    v_create_callback(retval, "set_penalty_present", cmd_llama_set_penalty_present);
     v_create_callback(retval, "set_penalty_last_n", cmd_llama_set_penalty_last_n);
     v_create_callback(retval, "set_max_tokens", cmd_llama_set_max_tokens);
     v_create_callback(retval, "set_min_p", cmd_llama_set_min_p);

Original file line number	Diff line number	Diff line change
`@@ -130,6 +130,8 @@ target_include_directories(llm PRIVATE`
`130`	`130`	`target_link_libraries(llm PRIVATE`
`131`	`131`	`llama`
`132`	`132`	`ggml`
	`133`	`+ # force dynamic libm`
	`134`	`+ -Wl,-Bdynamic,-lm`
`133`	`135`	`)`
`134`	`136`
`135`	`137`	`# Include all static code into plugin`