diff --git a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h index e97f64b7c1d..796dde88014 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h +++ b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h @@ -102,6 +102,9 @@ class LhdTokenGenerator : public TokenGenerator { AttentionSinkRopeRunner* attention_sink_rope_runner) override; private: + // Bring base class's virtual prepare_io into scope so the overload below + // does not hide it (-Woverloaded-virtual). + using TokenGenerator::prepare_io; /** * @brief Fill in I/O buffers with prompt token and position. * @param cur_token Current token. diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h index 83da9e7a6ba..7494afec6da 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h +++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h @@ -108,6 +108,9 @@ class MultimodalLhdTokenGenerator AttentionSinkRopeRunner* attention_sink_rope_runner) override; private: + // Bring base class's virtual prepare_io into scope so the overload below + // does not hide it (-Woverloaded-virtual). + using TokenGenerator::prepare_io; /** * @brief Fill in I/O buffers with prompt token and position. * @param cur_token Current token. diff --git a/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h b/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h index 0790985d231..599f7050d83 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h +++ b/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h @@ -40,6 +40,8 @@ class PromptProcessor { const std::string& method_name, Metadata metadata); + virtual ~PromptProcessor() = default; + /** * @brief Initialize I/O tensor and allocate I/O data buffer. * @param buffer_manager Pointer to IMemAlloc instance; by default, it uses a diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp index 0e9b7860dbd..0a4a8b9abb5 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp @@ -102,6 +102,7 @@ Runner::Runner( std::unique_ptr tokenizer, std::unique_ptr attention_sink_rope_module) : module_(std::move(module)), + attention_sink_rope_module_(std::move(attention_sink_rope_module)), ngram_(ngram), window_(window), gcap_(gcap), @@ -111,8 +112,7 @@ Runner::Runner( temperature_(temperature), eval_mode_(static_cast(eval_mode)), shared_buffer_(shared_buffer), - tokenizer_(std::move(tokenizer)), - attention_sink_rope_module_(std::move(attention_sink_rope_module)) { + tokenizer_(std::move(tokenizer)) { stats_.reset(); if (decoder_model_version == "llama2") {