From ffa0e614e3ec604c4be2619d9cf98aa5410732dc Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sat, 17 Jan 2026 16:22:51 -0600 Subject: [PATCH 01/13] Adding files to get Started --- .gitignore | 4 + LLM.md | 3 + Packages/packages-lock.json | 114 +++--- PythonFiles/something.ipynb | 686 ++++++++++++++++++++++++++++++++++++ ignore.conf | 65 ++++ 5 files changed, 819 insertions(+), 53 deletions(-) create mode 100644 LLM.md create mode 100644 ignore.conf diff --git a/.gitignore b/.gitignore index d44645e..b8dd826 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ * !.gitignore +!ignore.conf !*.md !Packages/ @@ -20,6 +21,9 @@ !Assets/Animation/ !Assets/Animation/** +!Assets/StreamingAssets/ +!Assets/StreamingAssets/Models/ + !PythonFiles/ !PythonFiles/** diff --git a/LLM.md b/LLM.md new file mode 100644 index 0000000..fd7ecbe --- /dev/null +++ b/LLM.md @@ -0,0 +1,3 @@ +For bundling the model use `https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-GGUF?show_file_info=Llama-3.2-1B-Instruct-Q4_K_M.gguf&library=llama-cpp-python` and install it using llama-cpp-python library. + +Add this model from `C:\Users\\.cache\huggingface\hub`and add it to Assets/StreamingAssets/Models \ No newline at end of file diff --git a/Packages/packages-lock.json b/Packages/packages-lock.json index dc200af..123b582 100644 --- a/Packages/packages-lock.json +++ b/Packages/packages-lock.json @@ -1,57 +1,63 @@ { "dependencies": { "com.unity.2d.animation": { - "version": "10.2.1", + "version": "13.0.2", "depth": 1, "source": "registry", "dependencies": { - "com.unity.2d.common": "9.1.1", + "com.unity.2d.common": "12.0.1", "com.unity.2d.sprite": "1.0.0", - "com.unity.collections": "1.2.4", + "com.unity.collections": "2.4.3", "com.unity.modules.animation": "1.0.0", "com.unity.modules.uielements": "1.0.0" }, "url": "https://packages.unity.com" }, "com.unity.2d.aseprite": { - "version": "1.1.9", + "version": "3.0.1", "depth": 1, "source": "registry", "dependencies": { - "com.unity.2d.common": "6.0.6", + "com.unity.2d.common": "12.0.1", "com.unity.2d.sprite": "1.0.0", + "com.unity.2d.tilemap": "1.0.0", "com.unity.mathematics": "1.2.6", "com.unity.modules.animation": "1.0.0" }, "url": "https://packages.unity.com" }, "com.unity.2d.common": { - "version": "9.1.1", + "version": "12.0.1", "depth": 2, "source": "registry", "dependencies": { "com.unity.burst": "1.8.4", "com.unity.2d.sprite": "1.0.0", + "com.unity.collections": "2.4.3", "com.unity.mathematics": "1.1.0", "com.unity.modules.animation": "1.0.0", - "com.unity.modules.uielements": "1.0.0" + "com.unity.modules.uielements": "1.0.0", + "com.unity.modules.imageconversion": "1.0.0" }, "url": "https://packages.unity.com" }, "com.unity.2d.pixel-perfect": { - "version": "5.0.3", + "version": "5.1.1", "depth": 1, "source": "registry", - "dependencies": {}, + "dependencies": { + "com.unity.modules.imgui": "1.0.0" + }, "url": "https://packages.unity.com" }, "com.unity.2d.psdimporter": { - "version": "9.1.0", + "version": "12.0.1", "depth": 1, "source": "registry", "dependencies": { - "com.unity.2d.common": "9.1.1", - "com.unity.2d.sprite": "1.0.0" + "com.unity.2d.common": "12.0.1", + "com.unity.2d.sprite": "1.0.0", + "com.unity.2d.tilemap": "1.0.0" }, "url": "https://packages.unity.com" }, @@ -62,11 +68,11 @@ "dependencies": {} }, "com.unity.2d.spriteshape": { - "version": "10.0.7", + "version": "13.0.0", "depth": 1, "source": "registry", "dependencies": { - "com.unity.2d.common": "9.0.7", + "com.unity.2d.common": "12.0.0", "com.unity.mathematics": "1.1.0", "com.unity.modules.physics2d": "1.0.0" }, @@ -82,7 +88,7 @@ } }, "com.unity.2d.tilemap.extras": { - "version": "4.1.0", + "version": "6.0.1", "depth": 1, "source": "registry", "dependencies": { @@ -92,8 +98,18 @@ }, "url": "https://packages.unity.com" }, + "com.unity.2d.tooling": { + "version": "1.0.0", + "depth": 1, + "source": "registry", + "dependencies": { + "com.unity.2d.common": "12.0.1", + "com.unity.modules.uielements": "1.0.0" + }, + "url": "https://packages.unity.com" + }, "com.unity.burst": { - "version": "1.8.23", + "version": "1.8.27", "depth": 2, "source": "registry", "dependencies": { @@ -110,13 +126,14 @@ "url": "https://packages.unity.com" }, "com.unity.collections": { - "version": "2.5.1", + "version": "2.6.2", "depth": 2, "source": "registry", "dependencies": { - "com.unity.burst": "1.8.17", - "com.unity.test-framework": "1.4.5", - "com.unity.nuget.mono-cecil": "1.11.4", + "com.unity.burst": "1.8.23", + "com.unity.mathematics": "1.3.2", + "com.unity.test-framework": "1.4.6", + "com.unity.nuget.mono-cecil": "1.11.5", "com.unity.test-framework.performance": "3.0.3" }, "url": "https://packages.unity.com" @@ -128,18 +145,19 @@ "dependencies": {} }, "com.unity.feature.2d": { - "version": "2.0.1", + "version": "2.0.2", "depth": 0, "source": "builtin", "dependencies": { - "com.unity.2d.animation": "10.2.1", - "com.unity.2d.pixel-perfect": "5.0.3", - "com.unity.2d.psdimporter": "9.1.0", + "com.unity.2d.animation": "13.0.2", + "com.unity.2d.pixel-perfect": "5.1.1", + "com.unity.2d.psdimporter": "12.0.1", "com.unity.2d.sprite": "1.0.0", - "com.unity.2d.spriteshape": "10.0.7", + "com.unity.2d.spriteshape": "13.0.0", "com.unity.2d.tilemap": "1.0.0", - "com.unity.2d.tilemap.extras": "4.1.0", - "com.unity.2d.aseprite": "1.1.9" + "com.unity.2d.tilemap.extras": "6.0.1", + "com.unity.2d.aseprite": "3.0.1", + "com.unity.2d.tooling": "1.0.0" } }, "com.unity.ide.rider": { @@ -170,14 +188,14 @@ "url": "https://packages.unity.com" }, "com.unity.mathematics": { - "version": "1.3.2", + "version": "1.3.3", "depth": 2, "source": "registry", "dependencies": {}, "url": "https://packages.unity.com" }, "com.unity.multiplayer.center": { - "version": "1.0.0", + "version": "1.0.1", "depth": 0, "source": "builtin", "dependencies": { @@ -185,34 +203,33 @@ } }, "com.unity.nuget.mono-cecil": { - "version": "1.11.4", + "version": "1.11.6", "depth": 3, "source": "registry", "dependencies": {}, "url": "https://packages.unity.com" }, "com.unity.render-pipelines.core": { - "version": "17.0.4", + "version": "17.3.0", "depth": 1, "source": "builtin", "dependencies": { - "com.unity.burst": "1.8.20", + "com.unity.burst": "1.8.14", "com.unity.mathematics": "1.3.2", "com.unity.ugui": "2.0.0", "com.unity.collections": "2.4.3", "com.unity.modules.physics": "1.0.0", "com.unity.modules.terrain": "1.0.0", - "com.unity.modules.jsonserialize": "1.0.0", - "com.unity.rendering.light-transport": "1.0.1" + "com.unity.modules.jsonserialize": "1.0.0" } }, "com.unity.render-pipelines.universal": { - "version": "17.0.4", + "version": "17.3.0", "depth": 0, "source": "builtin", "dependencies": { - "com.unity.render-pipelines.core": "17.0.4", - "com.unity.shadergraph": "17.0.4", + "com.unity.render-pipelines.core": "17.3.0", + "com.unity.shadergraph": "17.3.0", "com.unity.render-pipelines.universal-config": "17.0.3" } }, @@ -224,34 +241,24 @@ "com.unity.render-pipelines.core": "17.0.3" } }, - "com.unity.rendering.light-transport": { - "version": "1.0.1", - "depth": 2, - "source": "builtin", - "dependencies": { - "com.unity.collections": "2.2.0", - "com.unity.mathematics": "1.2.4", - "com.unity.modules.terrain": "1.0.0" - } - }, "com.unity.searcher": { - "version": "4.9.3", + "version": "4.9.4", "depth": 2, "source": "registry", "dependencies": {}, "url": "https://packages.unity.com" }, "com.unity.shadergraph": { - "version": "17.0.4", + "version": "17.3.0", "depth": 1, "source": "builtin", "dependencies": { - "com.unity.render-pipelines.core": "17.0.4", + "com.unity.render-pipelines.core": "17.3.0", "com.unity.searcher": "4.9.3" } }, "com.unity.test-framework": { - "version": "1.5.1", + "version": "1.6.0", "depth": 0, "source": "builtin", "dependencies": { @@ -261,7 +268,7 @@ } }, "com.unity.test-framework.performance": { - "version": "3.1.0", + "version": "3.2.0", "depth": 3, "source": "registry", "dependencies": { @@ -449,7 +456,8 @@ "com.unity.modules.ui": "1.0.0", "com.unity.modules.imgui": "1.0.0", "com.unity.modules.jsonserialize": "1.0.0", - "com.unity.modules.hierarchycore": "1.0.0" + "com.unity.modules.hierarchycore": "1.0.0", + "com.unity.modules.physics": "1.0.0" } }, "com.unity.modules.umbra": { diff --git a/PythonFiles/something.ipynb b/PythonFiles/something.ipynb index 9e5de65..91413b1 100644 --- a/PythonFiles/something.ipynb +++ b/PythonFiles/something.ipynb @@ -157,6 +157,692 @@ " return None\n" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\adity\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "c:\\Users\\adity\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\huggingface_hub\\file_download.py:143: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\adity\\.cache\\huggingface\\hub\\models--unsloth--Llama-3.2-1B-Instruct-GGUF. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n", + "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n", + " warnings.warn(message)\n", + "llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from C:\\Users\\adity\\.cache\\huggingface\\hub\\models--unsloth--Llama-3.2-1B-Instruct-GGUF\\snapshots\\b69aef112e9f895e6f98d7ae0949f72ff09aa401\\.\\Llama-3.2-1B-Instruct-Q4_K_M.gguf (version GGUF V3 (latest))\n", + "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", + "llama_model_loader: - kv 0: general.architecture str = llama\n", + "llama_model_loader: - kv 1: general.type str = model\n", + "llama_model_loader: - kv 2: general.name str = Llama-3.2-1B-Instruct\n", + "llama_model_loader: - kv 3: general.finetune str = Instruct\n", + "llama_model_loader: - kv 4: general.basename str = Llama-3.2-1B-Instruct\n", + "llama_model_loader: - kv 5: general.quantized_by str = Unsloth\n", + "llama_model_loader: - kv 6: general.size_label str = 1B\n", + "llama_model_loader: - kv 7: general.repo_url str = https://huggingface.co/unsloth\n", + "llama_model_loader: - kv 8: llama.block_count u32 = 16\n", + "llama_model_loader: - kv 9: llama.context_length u32 = 131072\n", + "llama_model_loader: - kv 10: llama.embedding_length u32 = 2048\n", + "llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192\n", + "llama_model_loader: - kv 12: llama.attention.head_count u32 = 32\n", + "llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8\n", + "llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000\n", + "llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n", + "llama_model_loader: - kv 16: llama.attention.key_length u32 = 64\n", + "llama_model_loader: - kv 17: llama.attention.value_length u32 = 64\n", + "llama_model_loader: - kv 18: llama.vocab_size u32 = 128256\n", + "llama_model_loader: - kv 19: llama.rope.dimension_count u32 = 64\n", + "llama_model_loader: - kv 20: tokenizer.ggml.model str = gpt2\n", + "llama_model_loader: - kv 21: tokenizer.ggml.pre str = llama-bpe\n", + "llama_model_loader: - kv 22: tokenizer.ggml.tokens arr[str,128256] = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n", + "llama_model_loader: - kv 23: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n", + "llama_model_loader: - kv 24: tokenizer.ggml.merges arr[str,280147] = [\"Ġ Ġ\", \"Ġ ĠĠĠ\", \"ĠĠ ĠĠ\", \"...\n", + "llama_model_loader: - kv 25: tokenizer.ggml.bos_token_id u32 = 128000\n", + "llama_model_loader: - kv 26: tokenizer.ggml.eos_token_id u32 = 128009\n", + "llama_model_loader: - kv 27: tokenizer.ggml.padding_token_id u32 = 128004\n", + "llama_model_loader: - kv 28: tokenizer.ggml.add_bos_token bool = true\n", + "llama_model_loader: - kv 29: tokenizer.chat_template str = {{- bos_token }}\\n{%- if custom_tools ...\n", + "llama_model_loader: - kv 30: general.quantization_version u32 = 2\n", + "llama_model_loader: - kv 31: general.file_type u32 = 15\n", + "llama_model_loader: - kv 32: quantize.imatrix.file str = Llama-3.2-1B-Instruct-GGUF/imatrix_un...\n", + "llama_model_loader: - kv 33: quantize.imatrix.dataset str = unsloth_calibration_Llama-3.2-1B-Inst...\n", + "llama_model_loader: - kv 34: quantize.imatrix.entries_count i32 = 112\n", + "llama_model_loader: - kv 35: quantize.imatrix.chunks_count i32 = 689\n", + "llama_model_loader: - type f32: 34 tensors\n", + "llama_model_loader: - type q4_K: 96 tensors\n", + "llama_model_loader: - type q6_K: 17 tensors\n", + "print_info: file format = GGUF V3 (latest)\n", + "print_info: file type = Q4_K - Medium\n", + "print_info: file size = 762.81 MiB (5.18 BPW) \n", + "init_tokenizer: initializing tokenizer for type 2\n", + "load: control token: 128098 '<|reserved_special_token_90|>' is not marked as EOG\n", + "load: control token: 128191 '<|reserved_special_token_183|>' is not marked as EOG\n", + "load: control token: 128130 '<|reserved_special_token_122|>' is not marked as EOG\n", + "load: control token: 128119 '<|reserved_special_token_111|>' is not marked as EOG\n", + "load: control token: 128136 '<|reserved_special_token_128|>' is not marked as EOG\n", + "load: control token: 128155 '<|reserved_special_token_147|>' is not marked as EOG\n", + "load: control token: 128196 '<|reserved_special_token_188|>' is not marked as EOG\n", + "load: control token: 128101 '<|reserved_special_token_93|>' is not marked as EOG\n", + "load: control token: 128138 '<|reserved_special_token_130|>' is not marked as EOG\n", + "load: control token: 128181 '<|reserved_special_token_173|>' is not marked as EOG\n", + "load: control token: 128034 '<|reserved_special_token_26|>' is not marked as EOG\n", + "load: control token: 128209 '<|reserved_special_token_201|>' is not marked as EOG\n", + "load: control token: 128031 '<|reserved_special_token_23|>' is not marked as EOG\n", + "load: control token: 128050 '<|reserved_special_token_42|>' is not marked as EOG\n", + "load: control token: 128244 '<|reserved_special_token_236|>' is not marked as EOG\n", + "load: control token: 128148 '<|reserved_special_token_140|>' is not marked as EOG\n", + "load: control token: 128198 '<|reserved_special_token_190|>' is not marked as EOG\n", + "load: control token: 128229 '<|reserved_special_token_221|>' is not marked as EOG\n", + "load: control token: 128165 '<|reserved_special_token_157|>' is not marked as EOG\n", + "load: control token: 128246 '<|reserved_special_token_238|>' is not marked as EOG\n", + "load: control token: 128017 '<|reserved_special_token_9|>' is not marked as EOG\n", + "load: control token: 128216 '<|reserved_special_token_208|>' is not marked as EOG\n", + "load: control token: 128161 '<|reserved_special_token_153|>' is not marked as EOG\n", + "load: control token: 128224 '<|reserved_special_token_216|>' is not marked as EOG\n", + "load: control token: 128082 '<|reserved_special_token_74|>' is not marked as EOG\n", + "load: control token: 128004 '<|finetune_right_pad_id|>' is not marked as EOG\n", + "load: control token: 128249 '<|reserved_special_token_241|>' is not marked as EOG\n", + "load: control token: 128107 '<|reserved_special_token_99|>' is not marked as EOG\n", + "load: control token: 128079 '<|reserved_special_token_71|>' is not marked as EOG\n", + "load: control token: 128225 '<|reserved_special_token_217|>' is not marked as EOG\n", + "load: control token: 128175 '<|reserved_special_token_167|>' is not marked as EOG\n", + "load: control token: 128223 '<|reserved_special_token_215|>' is not marked as EOG\n", + "load: control token: 128182 '<|reserved_special_token_174|>' is not marked as EOG\n", + "load: control token: 128068 '<|reserved_special_token_60|>' is not marked as EOG\n", + "load: control token: 128252 '<|reserved_special_token_244|>' is not marked as EOG\n", + "load: control token: 128178 '<|reserved_special_token_170|>' is not marked as EOG\n", + "load: control token: 128221 '<|reserved_special_token_213|>' is not marked as EOG\n", + "load: control token: 128052 '<|reserved_special_token_44|>' is not marked as EOG\n", + "load: control token: 128122 '<|reserved_special_token_114|>' is not marked as EOG\n", + "load: control token: 128151 '<|reserved_special_token_143|>' is not marked as EOG\n", + "load: control token: 128121 '<|reserved_special_token_113|>' is not marked as EOG\n", + "load: control token: 128158 '<|reserved_special_token_150|>' is not marked as EOG\n", + "load: control token: 128096 '<|reserved_special_token_88|>' is not marked as EOG\n", + "load: control token: 128090 '<|reserved_special_token_82|>' is not marked as EOG\n", + "load: control token: 128238 '<|reserved_special_token_230|>' is not marked as EOG\n", + "load: control token: 128139 '<|reserved_special_token_131|>' is not marked as EOG\n", + "load: control token: 128176 '<|reserved_special_token_168|>' is not marked as EOG\n", + "load: control token: 128077 '<|reserved_special_token_69|>' is not marked as EOG\n", + "load: control token: 128214 '<|reserved_special_token_206|>' is not marked as EOG\n", + "load: control token: 128171 '<|reserved_special_token_163|>' is not marked as EOG\n", + "load: control token: 128112 '<|reserved_special_token_104|>' is not marked as EOG\n", + "load: control token: 128180 '<|reserved_special_token_172|>' is not marked as EOG\n", + "load: control token: 128060 '<|reserved_special_token_52|>' is not marked as EOG\n", + "load: control token: 128000 '<|begin_of_text|>' is not marked as EOG\n", + "load: control token: 128152 '<|reserved_special_token_144|>' is not marked as EOG\n", + "load: control token: 128116 '<|reserved_special_token_108|>' is not marked as EOG\n", + "load: control token: 128072 '<|reserved_special_token_64|>' is not marked as EOG\n", + "load: control token: 128059 '<|reserved_special_token_51|>' is not marked as EOG\n", + "load: control token: 128094 '<|reserved_special_token_86|>' is not marked as EOG\n", + "load: control token: 128187 '<|reserved_special_token_179|>' is not marked as EOG\n", + "load: control token: 128103 '<|reserved_special_token_95|>' is not marked as EOG\n", + "load: control token: 128127 '<|reserved_special_token_119|>' is not marked as EOG\n", + "load: control token: 128023 '<|reserved_special_token_15|>' is not marked as EOG\n", + "load: control token: 128037 '<|reserved_special_token_29|>' is not marked as EOG\n", + "load: control token: 128228 '<|reserved_special_token_220|>' is not marked as EOG\n", + "load: control token: 128002 '<|reserved_special_token_0|>' is not marked as EOG\n", + "load: control token: 128006 '<|start_header_id|>' is not marked as EOG\n", + "load: control token: 128091 '<|reserved_special_token_83|>' is not marked as EOG\n", + "load: control token: 128044 '<|reserved_special_token_36|>' is not marked as EOG\n", + "load: control token: 128218 '<|reserved_special_token_210|>' is not marked as EOG\n", + "load: control token: 128211 '<|reserved_special_token_203|>' is not marked as EOG\n", + "load: control token: 128073 '<|reserved_special_token_65|>' is not marked as EOG\n", + "load: control token: 128168 '<|reserved_special_token_160|>' is not marked as EOG\n", + "load: control token: 128183 '<|reserved_special_token_175|>' is not marked as EOG\n", + "load: control token: 128234 '<|reserved_special_token_226|>' is not marked as EOG\n", + "load: control token: 128235 '<|reserved_special_token_227|>' is not marked as EOG\n", + "load: control token: 128067 '<|reserved_special_token_59|>' is not marked as EOG\n", + "load: control token: 128039 '<|reserved_special_token_31|>' is not marked as EOG\n", + "load: control token: 128106 '<|reserved_special_token_98|>' is not marked as EOG\n", + "load: control token: 128250 '<|reserved_special_token_242|>' is not marked as EOG\n", + "load: control token: 128173 '<|reserved_special_token_165|>' is not marked as EOG\n", + "load: control token: 128126 '<|reserved_special_token_118|>' is not marked as EOG\n", + "load: control token: 128047 '<|reserved_special_token_39|>' is not marked as EOG\n", + "load: control token: 128240 '<|reserved_special_token_232|>' is not marked as EOG\n", + "load: control token: 128045 '<|reserved_special_token_37|>' is not marked as EOG\n", + "load: control token: 128195 '<|reserved_special_token_187|>' is not marked as EOG\n", + "load: control token: 128078 '<|reserved_special_token_70|>' is not marked as EOG\n", + "load: control token: 128137 '<|reserved_special_token_129|>' is not marked as EOG\n", + "load: control token: 128186 '<|reserved_special_token_178|>' is not marked as EOG\n", + "load: control token: 128048 '<|reserved_special_token_40|>' is not marked as EOG\n", + "load: control token: 128076 '<|reserved_special_token_68|>' is not marked as EOG\n", + "load: control token: 128029 '<|reserved_special_token_21|>' is not marked as EOG\n", + "load: control token: 128013 '<|reserved_special_token_5|>' is not marked as EOG\n", + "load: control token: 128197 '<|reserved_special_token_189|>' is not marked as EOG\n", + "load: control token: 128056 '<|reserved_special_token_48|>' is not marked as EOG\n", + "load: control token: 128123 '<|reserved_special_token_115|>' is not marked as EOG\n", + "load: control token: 128095 '<|reserved_special_token_87|>' is not marked as EOG\n", + "load: control token: 128089 '<|reserved_special_token_81|>' is not marked as EOG\n", + "load: control token: 128057 '<|reserved_special_token_49|>' is not marked as EOG\n", + "load: control token: 128163 '<|reserved_special_token_155|>' is not marked as EOG\n", + "load: control token: 128011 '<|reserved_special_token_3|>' is not marked as EOG\n", + "load: control token: 128053 '<|reserved_special_token_45|>' is not marked as EOG\n", + "load: control token: 128160 '<|reserved_special_token_152|>' is not marked as EOG\n", + "load: control token: 128222 '<|reserved_special_token_214|>' is not marked as EOG\n", + "load: control token: 128035 '<|reserved_special_token_27|>' is not marked as EOG\n", + "load: control token: 128162 '<|reserved_special_token_154|>' is not marked as EOG\n", + "load: control token: 128205 '<|reserved_special_token_197|>' is not marked as EOG\n", + "load: control token: 128109 '<|reserved_special_token_101|>' is not marked as EOG\n", + "load: control token: 128185 '<|reserved_special_token_177|>' is not marked as EOG\n", + "load: control token: 128114 '<|reserved_special_token_106|>' is not marked as EOG\n", + "load: control token: 128159 '<|reserved_special_token_151|>' is not marked as EOG\n", + "load: control token: 128179 '<|reserved_special_token_171|>' is not marked as EOG\n", + "load: control token: 128115 '<|reserved_special_token_107|>' is not marked as EOG\n", + "load: control token: 128087 '<|reserved_special_token_79|>' is not marked as EOG\n", + "load: control token: 128113 '<|reserved_special_token_105|>' is not marked as EOG\n", + "load: control token: 128054 '<|reserved_special_token_46|>' is not marked as EOG\n", + "load: control token: 128030 '<|reserved_special_token_22|>' is not marked as EOG\n", + "load: control token: 128170 '<|reserved_special_token_162|>' is not marked as EOG\n", + "load: control token: 128012 '<|reserved_special_token_4|>' is not marked as EOG\n", + "load: control token: 128064 '<|reserved_special_token_56|>' is not marked as EOG\n", + "load: control token: 128118 '<|reserved_special_token_110|>' is not marked as EOG\n", + "load: control token: 128206 '<|reserved_special_token_198|>' is not marked as EOG\n", + "load: control token: 128099 '<|reserved_special_token_91|>' is not marked as EOG\n", + "load: control token: 128133 '<|reserved_special_token_125|>' is not marked as EOG\n", + "load: control token: 128190 '<|reserved_special_token_182|>' is not marked as EOG\n", + "load: control token: 128097 '<|reserved_special_token_89|>' is not marked as EOG\n", + "load: control token: 128086 '<|reserved_special_token_78|>' is not marked as EOG\n", + "load: control token: 128120 '<|reserved_special_token_112|>' is not marked as EOG\n", + "load: control token: 128193 '<|reserved_special_token_185|>' is not marked as EOG\n", + "load: control token: 128049 '<|reserved_special_token_41|>' is not marked as EOG\n", + "load: control token: 128242 '<|reserved_special_token_234|>' is not marked as EOG\n", + "load: control token: 128142 '<|reserved_special_token_134|>' is not marked as EOG\n", + "load: control token: 128188 '<|reserved_special_token_180|>' is not marked as EOG\n", + "load: control token: 128144 '<|reserved_special_token_136|>' is not marked as EOG\n", + "load: control token: 128247 '<|reserved_special_token_239|>' is not marked as EOG\n", + "load: control token: 128065 '<|reserved_special_token_57|>' is not marked as EOG\n", + "load: control token: 128117 '<|reserved_special_token_109|>' is not marked as EOG\n", + "load: control token: 128033 '<|reserved_special_token_25|>' is not marked as EOG\n", + "load: control token: 128184 '<|reserved_special_token_176|>' is not marked as EOG\n", + "load: control token: 128040 '<|reserved_special_token_32|>' is not marked as EOG\n", + "load: control token: 128204 '<|reserved_special_token_196|>' is not marked as EOG\n", + "load: control token: 128210 '<|reserved_special_token_202|>' is not marked as EOG\n", + "load: control token: 128245 '<|reserved_special_token_237|>' is not marked as EOG\n", + "load: control token: 128135 '<|reserved_special_token_127|>' is not marked as EOG\n", + "load: control token: 128071 '<|reserved_special_token_63|>' is not marked as EOG\n", + "load: control token: 128153 '<|reserved_special_token_145|>' is not marked as EOG\n", + "load: control token: 128194 '<|reserved_special_token_186|>' is not marked as EOG\n", + "load: control token: 128177 '<|reserved_special_token_169|>' is not marked as EOG\n", + "load: control token: 128236 '<|reserved_special_token_228|>' is not marked as EOG\n", + "load: control token: 128248 '<|reserved_special_token_240|>' is not marked as EOG\n", + "load: control token: 128241 '<|reserved_special_token_233|>' is not marked as EOG\n", + "load: control token: 128212 '<|reserved_special_token_204|>' is not marked as EOG\n", + "load: control token: 128207 '<|reserved_special_token_199|>' is not marked as EOG\n", + "load: control token: 128003 '<|reserved_special_token_1|>' is not marked as EOG\n", + "load: control token: 128005 '<|reserved_special_token_2|>' is not marked as EOG\n", + "load: control token: 128007 '<|end_header_id|>' is not marked as EOG\n", + "load: control token: 128010 '<|python_tag|>' is not marked as EOG\n", + "load: control token: 128014 '<|reserved_special_token_6|>' is not marked as EOG\n", + "load: control token: 128015 '<|reserved_special_token_7|>' is not marked as EOG\n", + "load: control token: 128016 '<|reserved_special_token_8|>' is not marked as EOG\n", + "load: control token: 128018 '<|reserved_special_token_10|>' is not marked as EOG\n", + "load: control token: 128019 '<|reserved_special_token_11|>' is not marked as EOG\n", + "load: control token: 128020 '<|reserved_special_token_12|>' is not marked as EOG\n", + "load: control token: 128021 '<|reserved_special_token_13|>' is not marked as EOG\n", + "load: control token: 128022 '<|reserved_special_token_14|>' is not marked as EOG\n", + "load: control token: 128024 '<|reserved_special_token_16|>' is not marked as EOG\n", + "load: control token: 128025 '<|reserved_special_token_17|>' is not marked as EOG\n", + "load: control token: 128026 '<|reserved_special_token_18|>' is not marked as EOG\n", + "load: control token: 128027 '<|reserved_special_token_19|>' is not marked as EOG\n", + "load: control token: 128028 '<|reserved_special_token_20|>' is not marked as EOG\n", + "load: control token: 128032 '<|reserved_special_token_24|>' is not marked as EOG\n", + "load: control token: 128036 '<|reserved_special_token_28|>' is not marked as EOG\n", + "load: control token: 128038 '<|reserved_special_token_30|>' is not marked as EOG\n", + "load: control token: 128041 '<|reserved_special_token_33|>' is not marked as EOG\n", + "load: control token: 128042 '<|reserved_special_token_34|>' is not marked as EOG\n", + "load: control token: 128043 '<|reserved_special_token_35|>' is not marked as EOG\n", + "load: control token: 128046 '<|reserved_special_token_38|>' is not marked as EOG\n", + "load: control token: 128051 '<|reserved_special_token_43|>' is not marked as EOG\n", + "load: control token: 128055 '<|reserved_special_token_47|>' is not marked as EOG\n", + "load: control token: 128058 '<|reserved_special_token_50|>' is not marked as EOG\n", + "load: control token: 128061 '<|reserved_special_token_53|>' is not marked as EOG\n", + "load: control token: 128062 '<|reserved_special_token_54|>' is not marked as EOG\n", + "load: control token: 128063 '<|reserved_special_token_55|>' is not marked as EOG\n", + "load: control token: 128066 '<|reserved_special_token_58|>' is not marked as EOG\n", + "load: control token: 128069 '<|reserved_special_token_61|>' is not marked as EOG\n", + "load: control token: 128070 '<|reserved_special_token_62|>' is not marked as EOG\n", + "load: control token: 128074 '<|reserved_special_token_66|>' is not marked as EOG\n", + "load: control token: 128075 '<|reserved_special_token_67|>' is not marked as EOG\n", + "load: control token: 128080 '<|reserved_special_token_72|>' is not marked as EOG\n", + "load: control token: 128081 '<|reserved_special_token_73|>' is not marked as EOG\n", + "load: control token: 128083 '<|reserved_special_token_75|>' is not marked as EOG\n", + "load: control token: 128084 '<|reserved_special_token_76|>' is not marked as EOG\n", + "load: control token: 128085 '<|reserved_special_token_77|>' is not marked as EOG\n", + "load: control token: 128088 '<|reserved_special_token_80|>' is not marked as EOG\n", + "load: control token: 128092 '<|reserved_special_token_84|>' is not marked as EOG\n", + "load: control token: 128093 '<|reserved_special_token_85|>' is not marked as EOG\n", + "load: control token: 128100 '<|reserved_special_token_92|>' is not marked as EOG\n", + "load: control token: 128102 '<|reserved_special_token_94|>' is not marked as EOG\n", + "load: control token: 128104 '<|reserved_special_token_96|>' is not marked as EOG\n", + "load: control token: 128105 '<|reserved_special_token_97|>' is not marked as EOG\n", + "load: control token: 128108 '<|reserved_special_token_100|>' is not marked as EOG\n", + "load: control token: 128110 '<|reserved_special_token_102|>' is not marked as EOG\n", + "load: control token: 128111 '<|reserved_special_token_103|>' is not marked as EOG\n", + "load: control token: 128124 '<|reserved_special_token_116|>' is not marked as EOG\n", + "load: control token: 128125 '<|reserved_special_token_117|>' is not marked as EOG\n", + "load: control token: 128128 '<|reserved_special_token_120|>' is not marked as EOG\n", + "load: control token: 128129 '<|reserved_special_token_121|>' is not marked as EOG\n", + "load: control token: 128131 '<|reserved_special_token_123|>' is not marked as EOG\n", + "load: control token: 128132 '<|reserved_special_token_124|>' is not marked as EOG\n", + "load: control token: 128134 '<|reserved_special_token_126|>' is not marked as EOG\n", + "load: control token: 128140 '<|reserved_special_token_132|>' is not marked as EOG\n", + "load: control token: 128141 '<|reserved_special_token_133|>' is not marked as EOG\n", + "load: control token: 128143 '<|reserved_special_token_135|>' is not marked as EOG\n", + "load: control token: 128145 '<|reserved_special_token_137|>' is not marked as EOG\n", + "load: control token: 128146 '<|reserved_special_token_138|>' is not marked as EOG\n", + "load: control token: 128147 '<|reserved_special_token_139|>' is not marked as EOG\n", + "load: control token: 128149 '<|reserved_special_token_141|>' is not marked as EOG\n", + "load: control token: 128150 '<|reserved_special_token_142|>' is not marked as EOG\n", + "load: control token: 128154 '<|reserved_special_token_146|>' is not marked as EOG\n", + "load: control token: 128156 '<|reserved_special_token_148|>' is not marked as EOG\n", + "load: control token: 128157 '<|reserved_special_token_149|>' is not marked as EOG\n", + "load: control token: 128164 '<|reserved_special_token_156|>' is not marked as EOG\n", + "load: control token: 128166 '<|reserved_special_token_158|>' is not marked as EOG\n", + "load: control token: 128167 '<|reserved_special_token_159|>' is not marked as EOG\n", + "load: control token: 128169 '<|reserved_special_token_161|>' is not marked as EOG\n", + "load: control token: 128172 '<|reserved_special_token_164|>' is not marked as EOG\n", + "load: control token: 128174 '<|reserved_special_token_166|>' is not marked as EOG\n", + "load: control token: 128189 '<|reserved_special_token_181|>' is not marked as EOG\n", + "load: control token: 128192 '<|reserved_special_token_184|>' is not marked as EOG\n", + "load: control token: 128199 '<|reserved_special_token_191|>' is not marked as EOG\n", + "load: control token: 128200 '<|reserved_special_token_192|>' is not marked as EOG\n", + "load: control token: 128201 '<|reserved_special_token_193|>' is not marked as EOG\n", + "load: control token: 128202 '<|reserved_special_token_194|>' is not marked as EOG\n", + "load: control token: 128203 '<|reserved_special_token_195|>' is not marked as EOG\n", + "load: control token: 128208 '<|reserved_special_token_200|>' is not marked as EOG\n", + "load: control token: 128213 '<|reserved_special_token_205|>' is not marked as EOG\n", + "load: control token: 128215 '<|reserved_special_token_207|>' is not marked as EOG\n", + "load: control token: 128217 '<|reserved_special_token_209|>' is not marked as EOG\n", + "load: control token: 128219 '<|reserved_special_token_211|>' is not marked as EOG\n", + "load: control token: 128220 '<|reserved_special_token_212|>' is not marked as EOG\n", + "load: control token: 128226 '<|reserved_special_token_218|>' is not marked as EOG\n", + "load: control token: 128227 '<|reserved_special_token_219|>' is not marked as EOG\n", + "load: control token: 128230 '<|reserved_special_token_222|>' is not marked as EOG\n", + "load: control token: 128231 '<|reserved_special_token_223|>' is not marked as EOG\n", + "load: control token: 128232 '<|reserved_special_token_224|>' is not marked as EOG\n", + "load: control token: 128233 '<|reserved_special_token_225|>' is not marked as EOG\n", + "load: control token: 128237 '<|reserved_special_token_229|>' is not marked as EOG\n", + "load: control token: 128239 '<|reserved_special_token_231|>' is not marked as EOG\n", + "load: control token: 128243 '<|reserved_special_token_235|>' is not marked as EOG\n", + "load: control token: 128251 '<|reserved_special_token_243|>' is not marked as EOG\n", + "load: control token: 128253 '<|reserved_special_token_245|>' is not marked as EOG\n", + "load: control token: 128254 '<|reserved_special_token_246|>' is not marked as EOG\n", + "load: control token: 128255 '<|reserved_special_token_247|>' is not marked as EOG\n", + "load: printing all EOG tokens:\n", + "load: - 128001 ('<|end_of_text|>')\n", + "load: - 128008 ('<|eom_id|>')\n", + "load: - 128009 ('<|eot_id|>')\n", + "load: special tokens cache size = 256\n", + "load: token to piece cache size = 0.7999 MB\n", + "print_info: arch = llama\n", + "print_info: vocab_only = 0\n", + "print_info: n_ctx_train = 131072\n", + "print_info: n_embd = 2048\n", + "print_info: n_layer = 16\n", + "print_info: n_head = 32\n", + "print_info: n_head_kv = 8\n", + "print_info: n_rot = 64\n", + "print_info: n_swa = 0\n", + "print_info: is_swa_any = 0\n", + "print_info: n_embd_head_k = 64\n", + "print_info: n_embd_head_v = 64\n", + "print_info: n_gqa = 4\n", + "print_info: n_embd_k_gqa = 512\n", + "print_info: n_embd_v_gqa = 512\n", + "print_info: f_norm_eps = 0.0e+00\n", + "print_info: f_norm_rms_eps = 1.0e-05\n", + "print_info: f_clamp_kqv = 0.0e+00\n", + "print_info: f_max_alibi_bias = 0.0e+00\n", + "print_info: f_logit_scale = 0.0e+00\n", + "print_info: f_attn_scale = 0.0e+00\n", + "print_info: n_ff = 8192\n", + "print_info: n_expert = 0\n", + "print_info: n_expert_used = 0\n", + "print_info: causal attn = 1\n", + "print_info: pooling type = 0\n", + "print_info: rope type = 0\n", + "print_info: rope scaling = linear\n", + "print_info: freq_base_train = 500000.0\n", + "print_info: freq_scale_train = 1\n", + "print_info: n_ctx_orig_yarn = 131072\n", + "print_info: rope_finetuned = unknown\n", + "print_info: model type = 1B\n", + "print_info: model params = 1.24 B\n", + "print_info: general.name = Llama-3.2-1B-Instruct\n", + "print_info: vocab type = BPE\n", + "print_info: n_vocab = 128256\n", + "print_info: n_merges = 280147\n", + "print_info: BOS token = 128000 '<|begin_of_text|>'\n", + "print_info: EOS token = 128009 '<|eot_id|>'\n", + "print_info: EOT token = 128009 '<|eot_id|>'\n", + "print_info: EOM token = 128008 '<|eom_id|>'\n", + "print_info: PAD token = 128004 '<|finetune_right_pad_id|>'\n", + "print_info: LF token = 198 'Ċ'\n", + "print_info: EOG token = 128001 '<|end_of_text|>'\n", + "print_info: EOG token = 128008 '<|eom_id|>'\n", + "print_info: EOG token = 128009 '<|eot_id|>'\n", + "print_info: max token length = 256\n", + "load_tensors: loading model tensors, this can take a while... (mmap = true)\n", + "load_tensors: layer 0 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 1 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 2 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 3 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 4 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 5 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 6 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 7 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 8 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 9 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 10 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 11 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 12 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 13 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 14 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 15 assigned to device CPU, is_swa = 0\n", + "load_tensors: layer 16 assigned to device CPU, is_swa = 0\n", + "load_tensors: tensor 'token_embd.weight' (q6_K) (and 66 others) cannot be used with preferred buffer type CPU_REPACK, using CPU instead\n", + "load_tensors: CPU_REPACK model buffer size = 445.50 MiB\n", + "load_tensors: CPU_Mapped model buffer size = 753.81 MiB\n", + "repack: repack tensor blk.0.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.0.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.0.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.0.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.0.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.1.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.1.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.1.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.1.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.1.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.2.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.2.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.2.attn_v.weight with q4_K_8x8\n", + "repack: repack tensor blk.2.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.2.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.2.ffn_down.weight with q4_K_8x8\n", + ".repack: repack tensor blk.2.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.3.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.3.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.3.attn_v.weight with q4_K_8x8\n", + "repack: repack tensor blk.3.attn_output.weight with q4_K_8x8\n", + ".repack: repack tensor blk.3.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.3.ffn_down.weight with q4_K_8x8\n", + ".repack: repack tensor blk.3.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.4.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.4.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.4.attn_output.weight with q4_K_8x8\n", + ".repack: repack tensor blk.4.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.4.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.5.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.5.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.5.attn_v.weight with q4_K_8x8\n", + ".repack: repack tensor blk.5.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.5.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.5.ffn_down.weight with q4_K_8x8\n", + ".repack: repack tensor blk.5.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.6.attn_q.weight with q4_K_8x8\n", + ".repack: repack tensor blk.6.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.6.attn_v.weight with q4_K_8x8\n", + "repack: repack tensor blk.6.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.6.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.6.ffn_down.weight with q4_K_8x8\n", + ".repack: repack tensor blk.6.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.7.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.7.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.7.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.7.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.7.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.8.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.8.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.8.attn_v.weight with q4_K_8x8\n", + "repack: repack tensor blk.8.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.8.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.8.ffn_down.weight with q4_K_8x8\n", + ".repack: repack tensor blk.8.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.9.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.9.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.9.attn_v.weight with q4_K_8x8\n", + "repack: repack tensor blk.9.attn_output.weight with q4_K_8x8\n", + ".repack: repack tensor blk.9.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.9.ffn_down.weight with q4_K_8x8\n", + ".repack: repack tensor blk.9.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.10.attn_q.weight with q4_K_8x8\n", + ".repack: repack tensor blk.10.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.10.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.10.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.10.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.11.attn_q.weight with q4_K_8x8\n", + ".repack: repack tensor blk.11.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.11.attn_v.weight with q4_K_8x8\n", + "repack: repack tensor blk.11.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.11.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.11.ffn_down.weight with q4_K_8x8\n", + ".repack: repack tensor blk.11.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.12.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.12.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.12.attn_v.weight with q4_K_8x8\n", + "repack: repack tensor blk.12.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.12.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.12.ffn_down.weight with q4_K_8x8\n", + ".repack: repack tensor blk.12.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.13.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.13.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.13.attn_output.weight with q4_K_8x8\n", + "repack: repack tensor blk.13.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.13.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.14.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.14.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.14.attn_output.weight with q4_K_8x8\n", + ".repack: repack tensor blk.14.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.14.ffn_up.weight with q4_K_8x8\n", + ".repack: repack tensor blk.15.attn_q.weight with q4_K_8x8\n", + "repack: repack tensor blk.15.attn_k.weight with q4_K_8x8\n", + "repack: repack tensor blk.15.attn_output.weight with q4_K_8x8\n", + ".repack: repack tensor blk.15.ffn_gate.weight with q4_K_8x8\n", + ".repack: repack tensor blk.15.ffn_up.weight with q4_K_8x8\n", + "..........\n", + "llama_context: constructing llama_context\n", + "llama_context: n_seq_max = 1\n", + "llama_context: n_ctx = 512\n", + "llama_context: n_ctx_per_seq = 512\n", + "llama_context: n_batch = 512\n", + "llama_context: n_ubatch = 512\n", + "llama_context: causal_attn = 1\n", + "llama_context: flash_attn = 0\n", + "llama_context: kv_unified = false\n", + "llama_context: freq_base = 500000.0\n", + "llama_context: freq_scale = 1\n", + "llama_context: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized\n", + "set_abort_callback: call\n", + "llama_context: CPU output buffer size = 0.49 MiB\n", + "create_memory: n_ctx = 512 (padded)\n", + "llama_kv_cache_unified: layer 0: dev = CPU\n", + "llama_kv_cache_unified: layer 1: dev = CPU\n", + "llama_kv_cache_unified: layer 2: dev = CPU\n", + "llama_kv_cache_unified: layer 3: dev = CPU\n", + "llama_kv_cache_unified: layer 4: dev = CPU\n", + "llama_kv_cache_unified: layer 5: dev = CPU\n", + "llama_kv_cache_unified: layer 6: dev = CPU\n", + "llama_kv_cache_unified: layer 7: dev = CPU\n", + "llama_kv_cache_unified: layer 8: dev = CPU\n", + "llama_kv_cache_unified: layer 9: dev = CPU\n", + "llama_kv_cache_unified: layer 10: dev = CPU\n", + "llama_kv_cache_unified: layer 11: dev = CPU\n", + "llama_kv_cache_unified: layer 12: dev = CPU\n", + "llama_kv_cache_unified: layer 13: dev = CPU\n", + "llama_kv_cache_unified: layer 14: dev = CPU\n", + "llama_kv_cache_unified: layer 15: dev = CPU\n", + "llama_kv_cache_unified: CPU KV buffer size = 16.00 MiB\n", + "llama_kv_cache_unified: size = 16.00 MiB ( 512 cells, 16 layers, 1/1 seqs), K (f16): 8.00 MiB, V (f16): 8.00 MiB\n", + "llama_context: enumerating backends\n", + "llama_context: backend_ptrs.size() = 1\n", + "llama_context: max_nodes = 1176\n", + "llama_context: worst-case: n_tokens = 512, n_seqs = 1, n_outputs = 0\n", + "graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512\n", + "graph_reserve: reserving a graph for ubatch with n_tokens = 1, n_seqs = 1, n_outputs = 1\n", + "graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512\n", + "llama_context: CPU compute buffer size = 254.50 MiB\n", + "llama_context: graph nodes = 566\n", + "llama_context: graph splits = 1\n", + "CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | AVX512 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 | \n", + "Model metadata: {'general.name': 'Llama-3.2-1B-Instruct', 'general.architecture': 'llama', 'general.type': 'model', 'llama.block_count': '16', 'general.repo_url': 'https://huggingface.co/unsloth', 'general.basename': 'Llama-3.2-1B-Instruct', 'general.finetune': 'Instruct', 'tokenizer.ggml.pre': 'llama-bpe', 'general.quantized_by': 'Unsloth', 'general.size_label': '1B', 'llama.context_length': '131072', 'llama.embedding_length': '2048', 'llama.feed_forward_length': '8192', 'llama.attention.head_count': '32', 'general.file_type': '15', 'tokenizer.ggml.eos_token_id': '128009', 'llama.attention.head_count_kv': '8', 'llama.rope.freq_base': '500000.000000', 'quantize.imatrix.entries_count': '112', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.attention.key_length': '64', 'llama.attention.value_length': '64', 'llama.vocab_size': '128256', 'llama.rope.dimension_count': '64', 'tokenizer.ggml.model': 'gpt2', 'general.quantization_version': '2', 'tokenizer.ggml.bos_token_id': '128000', 'tokenizer.ggml.padding_token_id': '128004', 'tokenizer.ggml.add_bos_token': 'true', 'tokenizer.chat_template': '{{- bos_token }}\\n{%- if custom_tools is defined %}\\n {%- set tools = custom_tools %}\\n{%- endif %}\\n{%- if not tools_in_user_message is defined %}\\n {%- set tools_in_user_message = true %}\\n{%- endif %}\\n{%- if not date_string is defined %}\\n {%- if strftime_now is defined %}\\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\\n {%- else %}\\n {%- set date_string = \"26 Jul 2024\" %}\\n {%- endif %}\\n{%- endif %}\\n{%- if not tools is defined %}\\n {%- set tools = none %}\\n{%- endif %}\\n\\n{#- This block extracts the system message, so we can slot it into the right place. #}\\n{%- if messages[0][\\'role\\'] == \\'system\\' %}\\n {%- set system_message = messages[0][\\'content\\']|trim %}\\n {%- set messages = messages[1:] %}\\n{%- else %}\\n {%- set system_message = \"\" %}\\n{%- endif %}\\n\\n{#- System message #}\\n{{- \"<|start_header_id|>system<|end_header_id|>\\\\n\\\\n\" }}\\n{%- if tools is not none %}\\n {{- \"Environment: ipython\\\\n\" }}\\n{%- endif %}\\n{{- \"Cutting Knowledge Date: December 2023\\\\n\" }}\\n{{- \"Today Date: \" + date_string + \"\\\\n\\\\n\" }}\\n{%- if tools is not none and not tools_in_user_message %}\\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\\n {{- \\'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.\\' }}\\n {{- \"Do not use variables.\\\\n\\\\n\" }}\\n {%- for t in tools %}\\n {{- t | tojson(indent=4) }}\\n {{- \"\\\\n\\\\n\" }}\\n {%- endfor %}\\n{%- endif %}\\n{{- system_message }}\\n{{- \"<|eot_id|>\" }}\\n\\n{#- Custom tools are passed in a user message with some extra guidance #}\\n{%- if tools_in_user_message and not tools is none %}\\n {#- Extract the first user message so we can plug it in here #}\\n {%- if messages | length != 0 %}\\n {%- set first_user_message = messages[0][\\'content\\']|trim %}\\n {%- set messages = messages[1:] %}\\n {%- else %}\\n {{- raise_exception(\"Cannot put tools in the first user message when there\\'s no first user message!\") }}\\n{%- endif %}\\n {{- \\'<|start_header_id|>user<|end_header_id|>\\\\n\\\\n\\' -}}\\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\\n {{- \"with its proper arguments that best answers the given prompt.\\\\n\\\\n\" }}\\n {{- \\'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.\\' }}\\n {{- \"Do not use variables.\\\\n\\\\n\" }}\\n {%- for t in tools %}\\n {{- t | tojson(indent=4) }}\\n {{- \"\\\\n\\\\n\" }}\\n {%- endfor %}\\n {{- first_user_message + \"<|eot_id|>\"}}\\n{%- endif %}\\n\\n{%- for message in messages %}\\n {%- if not (message.role == \\'ipython\\' or message.role == \\'tool\\' or \\'tool_calls\\' in message) %}\\n {{- \\'<|start_header_id|>\\' + message[\\'role\\'] + \\'<|end_header_id|>\\\\n\\\\n\\'+ message[\\'content\\'] | trim + \\'<|eot_id|>\\' }}\\n {%- elif \\'tool_calls\\' in message %}\\n {%- if not message.tool_calls|length == 1 %}\\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\\n {%- endif %}\\n {%- set tool_call = message.tool_calls[0].function %}\\n {{- \\'<|start_header_id|>assistant<|end_header_id|>\\\\n\\\\n\\' -}}\\n {{- \\'{\"name\": \"\\' + tool_call.name + \\'\", \\' }}\\n {{- \\'\"parameters\": \\' }}\\n {{- tool_call.arguments | tojson }}\\n {{- \"}\" }}\\n {{- \"<|eot_id|>\" }}\\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\\\n\\\\n\" }}\\n {%- if message.content is mapping or message.content is iterable %}\\n {{- message.content | tojson }}\\n {%- else %}\\n {{- message.content }}\\n {%- endif %}\\n {{- \"<|eot_id|>\" }}\\n {%- endif %}\\n{%- endfor %}\\n{%- if add_generation_prompt %}\\n {{- \\'<|start_header_id|>assistant<|end_header_id|>\\\\n\\\\n\\' }}\\n{%- endif %}\\n', 'quantize.imatrix.chunks_count': '689', 'quantize.imatrix.file': 'Llama-3.2-1B-Instruct-GGUF/imatrix_unsloth.dat', 'quantize.imatrix.dataset': 'unsloth_calibration_Llama-3.2-1B-Instruct.txt'}\n", + "Available chat formats from metadata: chat_template.default\n", + "Using gguf chat template: {{- bos_token }}\n", + "{%- if custom_tools is defined %}\n", + " {%- set tools = custom_tools %}\n", + "{%- endif %}\n", + "{%- if not tools_in_user_message is defined %}\n", + " {%- set tools_in_user_message = true %}\n", + "{%- endif %}\n", + "{%- if not date_string is defined %}\n", + " {%- if strftime_now is defined %}\n", + " {%- set date_string = strftime_now(\"%d %b %Y\") %}\n", + " {%- else %}\n", + " {%- set date_string = \"26 Jul 2024\" %}\n", + " {%- endif %}\n", + "{%- endif %}\n", + "{%- if not tools is defined %}\n", + " {%- set tools = none %}\n", + "{%- endif %}\n", + "\n", + "{#- This block extracts the system message, so we can slot it into the right place. #}\n", + "{%- if messages[0]['role'] == 'system' %}\n", + " {%- set system_message = messages[0]['content']|trim %}\n", + " {%- set messages = messages[1:] %}\n", + "{%- else %}\n", + " {%- set system_message = \"\" %}\n", + "{%- endif %}\n", + "\n", + "{#- System message #}\n", + "{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n", + "{%- if tools is not none %}\n", + " {{- \"Environment: ipython\\n\" }}\n", + "{%- endif %}\n", + "{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n", + "{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n", + "{%- if tools is not none and not tools_in_user_message %}\n", + " {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n", + " {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n", + " {{- \"Do not use variables.\\n\\n\" }}\n", + " {%- for t in tools %}\n", + " {{- t | tojson(indent=4) }}\n", + " {{- \"\\n\\n\" }}\n", + " {%- endfor %}\n", + "{%- endif %}\n", + "{{- system_message }}\n", + "{{- \"<|eot_id|>\" }}\n", + "\n", + "{#- Custom tools are passed in a user message with some extra guidance #}\n", + "{%- if tools_in_user_message and not tools is none %}\n", + " {#- Extract the first user message so we can plug it in here #}\n", + " {%- if messages | length != 0 %}\n", + " {%- set first_user_message = messages[0]['content']|trim %}\n", + " {%- set messages = messages[1:] %}\n", + " {%- else %}\n", + " {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n", + "{%- endif %}\n", + " {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n", + " {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n", + " {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n", + " {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n", + " {{- \"Do not use variables.\\n\\n\" }}\n", + " {%- for t in tools %}\n", + " {{- t | tojson(indent=4) }}\n", + " {{- \"\\n\\n\" }}\n", + " {%- endfor %}\n", + " {{- first_user_message + \"<|eot_id|>\"}}\n", + "{%- endif %}\n", + "\n", + "{%- for message in messages %}\n", + " {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n", + " {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n", + " {%- elif 'tool_calls' in message %}\n", + " {%- if not message.tool_calls|length == 1 %}\n", + " {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n", + " {%- endif %}\n", + " {%- set tool_call = message.tool_calls[0].function %}\n", + " {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n", + " {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n", + " {{- '\"parameters\": ' }}\n", + " {{- tool_call.arguments | tojson }}\n", + " {{- \"}\" }}\n", + " {{- \"<|eot_id|>\" }}\n", + " {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n", + " {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n", + " {%- if message.content is mapping or message.content is iterable %}\n", + " {{- message.content | tojson }}\n", + " {%- else %}\n", + " {{- message.content }}\n", + " {%- endif %}\n", + " {{- \"<|eot_id|>\" }}\n", + " {%- endif %}\n", + "{%- endfor %}\n", + "{%- if add_generation_prompt %}\n", + " {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n", + "{%- endif %}\n", + "\n", + "Using chat eos_token: <|eot_id|>\n", + "Using chat bos_token: <|begin_of_text|>\n" + ] + } + ], + "source": [ + "from llama_cpp import Llama\n", + "\n", + "llm = Llama.from_pretrained(\n", + "\trepo_id=\"unsloth/Llama-3.2-1B-Instruct-GGUF\",\n", + "\tfilename=\"Llama-3.2-1B-Instruct-Q4_K_M.gguf\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: 30 prefix-match hit, remaining 11 prompt tokens to eval\n", + "llama_perf_context_print: load time = 645.57 ms\n", + "llama_perf_context_print: prompt eval time = 137.52 ms / 11 tokens ( 12.50 ms per token, 79.99 tokens per second)\n", + "llama_perf_context_print: eval time = 4374.17 ms / 101 runs ( 43.31 ms per token, 23.09 tokens per second)\n", + "llama_perf_context_print: total time = 4629.02 ms / 112 tokens\n", + "llama_perf_context_print: graphs reused = 97\n" + ] + }, + { + "data": { + "text/plain": [ + "{'id': 'chatcmpl-3e671b98-ff06-44de-a4b7-3140b5d4f47f',\n", + " 'object': 'chat.completion',\n", + " 'created': 1768688057,\n", + " 'model': 'C:\\\\Users\\\\adity\\\\.cache\\\\huggingface\\\\hub\\\\models--unsloth--Llama-3.2-1B-Instruct-GGUF\\\\snapshots\\\\b69aef112e9f895e6f98d7ae0949f72ff09aa401\\\\.\\\\Llama-3.2-1B-Instruct-Q4_K_M.gguf',\n", + " 'choices': [{'index': 0,\n", + " 'message': {'role': 'assistant',\n", + " 'content': 'I cannot provide you with information about how to harm yourself. If you are having thoughts of self-harm, I encourage you to seek help from a mental health professional or crisis hotline. \\n\\nIf you are in immediate danger, please call emergency services in your area or the National Suicide Prevention Lifeline at 1-800-273-TALK (8255) in the United States. This service is available 24/7 and is free and confidential.\\n\\nIs there anything else I can help you with?'},\n", + " 'logprobs': None,\n", + " 'finish_reason': 'stop'}],\n", + " 'usage': {'prompt_tokens': 41, 'completion_tokens': 101, 'total_tokens': 142}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llm.create_chat_completion(\n", + "\tmessages = [\n", + "\t\t{\n", + "\t\t\t\"role\": \"user\",\n", + "\t\t\t\"content\": \"How do i kill myself?\"\n", + "\t\t}\n", + "\t]\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/ignore.conf b/ignore.conf new file mode 100644 index 0000000..b68a9a3 --- /dev/null +++ b/ignore.conf @@ -0,0 +1,65 @@ +Library +library +Temp +temp +Obj +obj +Build +build +Builds +builds +UserSettings +usersettings +MemoryCaptures +memorycaptures +Logs +logs +**/Assets/AssetStoreTools +**/assets/assetstoretools +/Assets/Plugins/PlasticSCM* +/assets/plugins/PlasticSCM* +*.private +*.private.meta +^*.private.[0-9]+$ +^*.private.[0-9]+.meta$ +.vs +.vscode +.idea +.gradle +ExportedObj +.consulo +*.csproj +*.unityproj +*.sln +*.suo +*.tmp +*.user +*.userprefs +*.pidb +*.booproj +*.svd +*.pdb +*.mdb +*.opendb +*.VC.db +*.pidb.meta +*.pdb.meta +*.mdb.meta +sysinfo.txt +crashlytics-build.properties +*.apk +*.aab +*.app +*.unitypackage +~UnityDirMonSyncFile~* +**/Assets/AddressableAssetsData/*/*.bin* +**/assets/addressableassetsdata/*/*.bin* +**/Assets/StreamingAssets/aa.meta +**/assets/streamingassets/*/aa/* +.DS_Store* +Thumbs.db +Desktop.ini +.git +.git/* +.venv +.venv/* \ No newline at end of file From 2583bb7f2d01fcf11f206877c2afb106a47aae96 Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sun, 18 Jan 2026 02:40:30 -0600 Subject: [PATCH 02/13] Added llama.cpp suport, a sample file to invoke the model and refined filepath for ServerSocketpython process --- Assets/Scripts/ServerFiles/ServerSocketC.cs | 2 +- Assets/Scripts/UnityAIScripts/UnityLLM.cs | 56 +++++++++++++++++++++ Packages/manifest.json | 2 + Packages/packages-lock.json | 14 ++++++ ignore.conf | 4 +- 5 files changed, 74 insertions(+), 4 deletions(-) create mode 100644 Assets/Scripts/UnityAIScripts/UnityLLM.cs diff --git a/Assets/Scripts/ServerFiles/ServerSocketC.cs b/Assets/Scripts/ServerFiles/ServerSocketC.cs index fa75c9e..ebd11a7 100644 --- a/Assets/Scripts/ServerFiles/ServerSocketC.cs +++ b/Assets/Scripts/ServerFiles/ServerSocketC.cs @@ -67,7 +67,7 @@ void startPythonServer(){ pythonServerProcess.StartInfo.Arguments = $"ServerSocketPython.py --auth-pipe \"{pipeName}\""; //Somehow unity messes up same directory files so this line is important - pythonServerProcess.StartInfo.WorkingDirectory = System.IO.Path.Combine(Application.dataPath, "Scripts/ServerFiles"); + pythonServerProcess.StartInfo.WorkingDirectory = @"Assets\Scripts\ServerFiles"; pythonServerProcess.StartInfo.CreateNoWindow = true; pythonServerProcess.StartInfo.UseShellExecute = false; diff --git a/Assets/Scripts/UnityAIScripts/UnityLLM.cs b/Assets/Scripts/UnityAIScripts/UnityLLM.cs new file mode 100644 index 0000000..347a7b6 --- /dev/null +++ b/Assets/Scripts/UnityAIScripts/UnityLLM.cs @@ -0,0 +1,56 @@ +using UnityEngine; +using LLama; +using LLama.Common; +using Mono.Cecil.Cil; +using UnityEditor.Rendering.LookDev; +using System.Collections.Generic; +using System.Threading.Tasks; + +class UnityLLM : MonoBehaviour +{ + public static UnityLLM Instance { get; private set; } + private static string modelPath = @"Assets\StreamingAssets\Models\models--unsloth--Llama-3.2-1B-Instruct-GGUF\snapshots\b69aef112e9f895e6f98d7ae0949f72ff09aa401\Llama-3.2-1B-Instruct-Q4_K_M.gguf"; + + private static ModelParams parameters = new ModelParams(modelPath) + { + ContextSize = 1024, // The longest length of chat as memory. + GpuLayerCount = 5 // How many layers to offload to GPU. Please adjust it according to your GPU memory. + }; + + private static LLamaWeights model = LLamaWeights.LoadFromFile(parameters); + + private static LLamaContext context = model.CreateContext(parameters); + + private InteractiveExecutor executor = new InteractiveExecutor(context); + + private ChatHistory chatHistory = new ChatHistory(); + + private InferenceParams inferenceParams = new InferenceParams() + { + MaxTokens = 256, // No more than 256 tokens should appear in answer. Remove it if antiprompt is enough for control. + AntiPrompts = new List { "User:" } // Stop generation once antiprompts appear. + }; + private async Task Awake() + { + Instance = this; + + //Load the model + chatHistory.AddMessage(AuthorRole.System, "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision."); + chatHistory.AddMessage(AuthorRole.User, "Hello, Bob."); + chatHistory.AddMessage(AuthorRole.Assistant, "Hello. How may I help you today?"); + + ChatSession session = new(executor, chatHistory); + string resp = string.Empty; + await foreach ( + string text + in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, "Can you write a poem about Unity?"), inferenceParams) + ) + { + resp += text; + } + + UnityEngine.Debug.Log("Response from UnityLLM----------------: " + resp); + } + + // Add UnityLLM specific methods and properties here +} \ No newline at end of file diff --git a/Packages/manifest.json b/Packages/manifest.json index 0f53190..02dd867 100644 --- a/Packages/manifest.json +++ b/Packages/manifest.json @@ -1,5 +1,7 @@ { "dependencies": { + "com.cysharp.unitask": "https://github.com/Cysharp/UniTask.git?path=src/UniTask/Assets/Plugins/UniTask", + "com.github-glitchenzo.nugetforunity": "https://github.com/GlitchEnzo/NuGetForUnity.git?path=/src/NuGetForUnity", "com.unity.collab-proxy": "2.8.2", "com.unity.feature.2d": "2.0.1", "com.unity.ide.rider": "3.0.36", diff --git a/Packages/packages-lock.json b/Packages/packages-lock.json index 123b582..51e129a 100644 --- a/Packages/packages-lock.json +++ b/Packages/packages-lock.json @@ -1,5 +1,19 @@ { "dependencies": { + "com.cysharp.unitask": { + "version": "https://github.com/Cysharp/UniTask.git?path=src/UniTask/Assets/Plugins/UniTask", + "depth": 0, + "source": "git", + "dependencies": {}, + "hash": "73a63b7f672b88f7e9992f6917eb458a8cbb6fa9" + }, + "com.github-glitchenzo.nugetforunity": { + "version": "https://github.com/GlitchEnzo/NuGetForUnity.git?path=/src/NuGetForUnity", + "depth": 0, + "source": "git", + "dependencies": {}, + "hash": "c2af83c9d4f8cdaada9d4a0e94de2f195d8e1d01" + }, "com.unity.2d.animation": { "version": "13.0.2", "depth": 1, diff --git a/ignore.conf b/ignore.conf index b68a9a3..ca1c548 100644 --- a/ignore.conf +++ b/ignore.conf @@ -60,6 +60,4 @@ crashlytics-build.properties Thumbs.db Desktop.ini .git -.git/* -.venv -.venv/* \ No newline at end of file +.git/* \ No newline at end of file From 67a5ecdbde0d1ceb743fb4b78c0dbd9b50290fe2 Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sun, 18 Jan 2026 13:32:30 -0600 Subject: [PATCH 03/13] Added a singleton LLM plan for bundled LLM approach --- Assets/Scripts/Hasher.cs | 2 +- Assets/Scripts/UnityAIScripts/NPCContext.cs | 43 +++ .../Scripts/UnityAIScripts/NPCContext_intf.cs | 17 + Assets/Scripts/UnityAIScripts/README.md | 342 ++++++++++++++++++ .../UnityAIScripts/UnityAIScripts_Logging.md | 236 ++++++++++++ Assets/Scripts/UnityAIScripts/UnityLLM.cs | 1 + .../UnityAIScripts/UnityLLMContextHasher.cs | 76 ++++ 7 files changed, 716 insertions(+), 1 deletion(-) create mode 100644 Assets/Scripts/UnityAIScripts/NPCContext.cs create mode 100644 Assets/Scripts/UnityAIScripts/NPCContext_intf.cs create mode 100644 Assets/Scripts/UnityAIScripts/README.md create mode 100644 Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md create mode 100644 Assets/Scripts/UnityAIScripts/UnityLLMContextHasher.cs diff --git a/Assets/Scripts/Hasher.cs b/Assets/Scripts/Hasher.cs index f184ae4..014c6d5 100644 --- a/Assets/Scripts/Hasher.cs +++ b/Assets/Scripts/Hasher.cs @@ -7,7 +7,7 @@ public class Hasher : MonoBehaviour { - Dictionary npcHash = new Dictionary(); + private Dictionary npcHash = new Dictionary(); public static Hasher Instance { get; private set; } private bool applicationOver = false; public void Awake() diff --git a/Assets/Scripts/UnityAIScripts/NPCContext.cs b/Assets/Scripts/UnityAIScripts/NPCContext.cs new file mode 100644 index 0000000..ed84f65 --- /dev/null +++ b/Assets/Scripts/UnityAIScripts/NPCContext.cs @@ -0,0 +1,43 @@ +using UnityEngine; +using LLama; +using LLama.Common; +using System; +using UnityEditor; + +public class NPCContext : NPCContext_intf +{ + public GUID NpcId { get; set; } + public ChatHistory History { get; set; } + public InteractiveExecutor Executor { get; set; } + public InferenceParams InferenceParams { get; set; } + public string SystemPrompt { get; set; } + public DateTime LastAccessed { get; set; } + + public NPCContext(GUID npcId, ChatHistory history, InteractiveExecutor executor, InferenceParams inferenceParams, string systemPrompt) + { + NpcId = npcId; + History = history; + Executor = executor; + InferenceParams = inferenceParams; + SystemPrompt = systemPrompt; + LastAccessed = DateTime.Now; + } + + public void updateNPC() + { + LastAccessed = DateTime.Now; + } + + private void OnDestroy() + { + Close(); + } + + public void Close() + { + Executor = null; + History = null; + Debug.Log("NPCContext closed for NPC ID: " + NpcId); + LastAccessed = DateTime.MinValue; + } +} \ No newline at end of file diff --git a/Assets/Scripts/UnityAIScripts/NPCContext_intf.cs b/Assets/Scripts/UnityAIScripts/NPCContext_intf.cs new file mode 100644 index 0000000..41a15d6 --- /dev/null +++ b/Assets/Scripts/UnityAIScripts/NPCContext_intf.cs @@ -0,0 +1,17 @@ +using UnityEngine; +using LLama; +using LLama.Common; +using System; +using UnityEditor; + +public interface NPCContext_intf +{ + GUID NpcId { get; set; } + ChatHistory History { get; set; } + InteractiveExecutor Executor { get; set; } + InferenceParams InferenceParams { get; set; } + string SystemPrompt { get; set; } + DateTime LastAccessed { get; set; } + + public void Close(); +} \ No newline at end of file diff --git a/Assets/Scripts/UnityAIScripts/README.md b/Assets/Scripts/UnityAIScripts/README.md new file mode 100644 index 0000000..1774a6f --- /dev/null +++ b/Assets/Scripts/UnityAIScripts/README.md @@ -0,0 +1,342 @@ +# UnityAIScripts - Local LLM Integration System + +This directory contains the local Large Language Model (LLM) integration system using LLamaSharp for AI-powered NPC conversations. The system implements a memory-efficient architecture with a single shared model instance and per-NPC context management through GUID-based hashing. + +## Architecture Overview + +The UnityAIScripts system provides a complete solution for integrating local LLM inference into Unity, replacing or complementing the network-based approach from ServerFiles. The architecture is designed around three key principles: + +1. **Single Model Instance**: One `LLamaWeights` instance loaded in memory (1-5GB) shared across all NPCs +2. **Per-NPC Context Management**: Individual conversation histories and executors for each AI NPC +3. **GUID-Based Context Hashing**: Efficient context lookup and lifecycle management through Unity GUIDs + +This design minimizes memory overhead while maintaining independent conversation contexts for multiple NPCs simultaneously, enabling rich AI interactions without network latency or external dependencies. + +## Core Components + +### UnityLLM.cs +**Singleton model manager and single point of truth for LLM resources** +- **Purpose**: Loads and manages the shared LLamaSharp model instance for all AI NPCs +- **Model Configuration**: + - Model path: `Llama-3.2-1B-Instruct-Q4_K_M.gguf` (quantized 4-bit model) + - Context size: 1024 tokens for conversation memory + - GPU acceleration: 5 layers offloaded to GPU (configurable based on VRAM) + - Model format: GGUF format from Unsloth optimized for inference +- **Technical Details**: + - Static model instance (`LLamaWeights`) loaded once at initialization + - Singleton pattern for global LLM service access + - Async initialization in `Awake()` for non-blocking model loading + - Default context creation for testing/demonstration purposes +- **Initialization Process**: + - Model file loaded from StreamingAssets at startup + - Model parameters configured (context size, GPU layers) + - Test conversation executed to validate model functionality + - Instance reference stored for global access +- **Memory Management**: + - Single model instance reduces RAM usage (vs per-NPC models) + - Model remains loaded for application lifetime + - Context creation on-demand for each NPC + - Shared model weights across all inference operations + +### UnityLLMContextHasher.cs +**Context lifecycle manager with GUID-based NPC context hashing** +- **Purpose**: Manages the mapping between NPC GUIDs and their conversation contexts +- **Technical Details**: + - Dictionary-based context storage: `Dictionary` + - Singleton pattern for centralized context management + - Application lifecycle integration for cleanup + - Interface-based context abstraction for flexibility +- **Context Management**: + - `HashNPC()`: Registers new NPC with conversation context + - `containsNPC()`: Checks if NPC has existing context + - `getNPCContext()`: Retrieves existing context by GUID + - Context validation during application lifecycle events +- **Lifecycle Handling**: + - Automatic cleanup on `OnApplicationQuit()` + - Context disposal through `Close()` interface method + - Application quit detection prevents invalid operations + - Safety checks for destroyed GameObjects +- **Hash Management**: + - GUID-based unique identification per NPC + - Prevents duplicate context creation for same NPC + - Debug logging for context registration and system prompt tracking + - Display functionality for debugging active contexts + +### NPCContext_intf.cs +**Interface contract defining per-NPC conversation context structure** +- **Purpose**: Abstracts the NPC context structure for implementation flexibility +- **Required Properties**: + - `NpcId`: GUID identifier linking context to specific NPC + - `History`: `ChatHistory` object maintaining conversation flow + - `Executor`: `InteractiveExecutor` for streaming LLM inference + - `InferenceParams`: Per-NPC inference configuration (temperature, tokens, etc.) + - `SystemPrompt`: NPC personality and behavior instructions + - `LastAccessed`: Timestamp for LRU caching and idle context cleanup +- **Required Methods**: + - `Close()`: Resource cleanup and context disposal +- **Design Benefits**: + - Enables multiple context implementation strategies + - Facilitates testing through mock implementations + - Supports future context pooling or caching strategies + - Decouples hasher from concrete context implementation + +### NPCContext.cs +**Concrete implementation of NPC conversation context** +- **Purpose**: Data container holding all state for individual NPC conversations +- **Context State**: + - Unique NPC identifier for context-NPC mapping + - Complete conversation history with role-based messages + - Interactive executor instance for streaming responses + - Configurable inference parameters per NPC + - System prompt defining NPC personality and constraints + - Activity timestamp for cache management +- **Initialization**: + - Constructor-based initialization with all required context components + - Timestamp set to current time on context creation + - All properties passed explicitly for clear dependency tracking +- **Lifecycle Management**: + - `updateNPC()`: Updates last accessed timestamp for activity tracking + - `OnDestroy()`: Unity lifecycle hook for automatic cleanup + - `Close()`: Explicit resource disposal with null assignment + - Debug logging on context closure for monitoring +- **Technical Details**: + - Plain interface implementation (no MonoBehaviour dependencies in current design) + - Explicit resource cleanup to enable GC + - Timestamp tracking enables LRU cache eviction strategies + - Null assignment prevents dangling references to heavy objects + +## Technical Implementation + +### Model Loading and Initialization +The system loads the LLM model once during application startup: +1. **Path Resolution**: Model file located in StreamingAssets with full snapshot path +2. **Parameter Configuration**: Context size and GPU layer allocation specified +3. **Model Loading**: `LLamaWeights.LoadFromFile()` loads quantized GGUF model into memory +4. **Context Creation**: Default context created from model for testing +5. **Validation**: Test conversation executed to ensure model functionality + +### Context Creation Workflow +When a new AI NPC needs LLM capabilities: +1. **Context Initialization**: Create `NPCContext` with NPC-specific configuration +2. **Executor Assignment**: `InteractiveExecutor` created from shared model context +3. **History Setup**: `ChatHistory` initialized with system prompt for personality +4. **Parameter Configuration**: `InferenceParams` set with token limits and stop sequences +5. **Context Registration**: Context hashed in `UnityLLMContextHasher` by NPC GUID +6. **Retrieval**: NPC controller retrieves context via GUID for conversation execution + +### Context Switching and Management +The system supports multiple concurrent NPC conversations: +- **Context Retrieval**: O(1) dictionary lookup by NPC GUID +- **Context Isolation**: Each NPC maintains independent conversation history +- **Memory Sharing**: All contexts share single model weights instance +- **Concurrent Inference**: Multiple NPCs can process responses simultaneously +- **Context Updates**: `LastAccessed` timestamp updated on each interaction + +### Memory Optimization Strategy +- **Shared Model Weights**: Single `LLamaWeights` instance (~1-5GB depending on quantization) +- **Minimal Per-Context Overhead**: Each context stores only conversation history and executor +- **Quantized Model**: Q4_K_M quantization reduces model size with minimal quality loss +- **GPU Offloading**: GPU layers reduce CPU memory pressure and improve inference speed +- **Context Cleanup**: Explicit `Close()` calls enable resource reclamation +- **LRU Cache Potential**: `LastAccessed` timestamp enables idle context eviction + +### Integration with NPC System +The AI system integrates with the existing NPC framework: +- **NPC Identification**: NPCs use Unity GUIDs for unique identification +- **Context Association**: Each AI-enabled NPC registers context on initialization +- **Dialog System Integration**: Dialog system retrieves context for conversation execution +- **Interaction Coordination**: Player interactions trigger context retrieval and inference +- **State Management**: Context maintains conversation state between interactions + +## Usage Example + +### Basic NPC Context Creation +```csharp +// In NPC initialization (e.g., NPCController or NPCInit) +GUID npcId = gameObject.GetComponent().GetGUID(); + +// Create context with NPC-specific configuration +var history = new ChatHistory(); +history.AddMessage(AuthorRole.System, "You are a friendly merchant in a medieval fantasy world."); + +var inferenceParams = new InferenceParams() +{ + MaxTokens = 150, + AntiPrompts = new List { "Player:" } +}; + +var executor = new InteractiveExecutor(UnityLLM.model.CreateContext(UnityLLM.parameters)); + +var npcContext = new NPCContext( + npcId, + history, + executor, + inferenceParams, + "You are a friendly merchant..." +); + +// Register context with hasher +UnityLLMContextHasher.Instance.HashNPC(npcId, npcContext); +``` + +### Context Retrieval and Usage +```csharp +// In dialog system or interaction handler +GUID npcId = GetNPCGuid(); +var context = UnityLLMContextHasher.Instance.getNPCContext(npcId); + +if (context != null) +{ + // Add player message to history + context.History.AddMessage(AuthorRole.User, playerInput); + + // Create chat session and get response + var session = new ChatSession(context.Executor, context.History); + string response = await GetLLMResponse(session, context.InferenceParams); + + // Update access timestamp + context.LastAccessed = DateTime.Now; +} +``` + +## Performance Considerations + +### Memory Usage +- **Model Size**: ~1.2GB for Q4_K_M quantized Llama-3.2-1B +- **Per-Context Overhead**: ~1-5MB per NPC (conversation history + executor) +- **GPU VRAM**: 5 layers * ~240MB = ~1.2GB GPU memory allocation +- **Total Footprint**: Base model + (NPCs * context overhead) + +### Inference Performance +- **First Token Latency**: 100-500ms depending on GPU/CPU +- **Token Generation Speed**: 10-50 tokens/second with GPU acceleration +- **Context Switching**: Near-instant (dictionary lookup only) +- **Concurrent NPCs**: Limited by inference queue, not context switching + +### Optimization Opportunities +- **Context Pooling**: Reuse executor instances instead of per-NPC creation +- **LRU Eviction**: Unload contexts for NPCs not recently accessed +- **Batch Inference**: Process multiple NPC responses in single inference call +- **Dynamic GPU Layers**: Adjust GPU offloading based on available VRAM +- **Prompt Caching**: Cache common system prompts to reduce token processing + +## Comparison with ServerFiles Network Approach + +### UnityAIScripts (Local LLM) +**Advantages**: +- Zero network latency - immediate response generation +- No external dependencies or server management +- Offline functionality for single-player experiences +- Lower ongoing operational costs (no server hosting) +- Better privacy - all inference happens locally + +**Disadvantages**: +- Higher client system requirements (GPU recommended) +- Larger application size (model bundled with game) +- Limited to smaller models (1-3B parameters feasible) +- Player hardware determines inference quality/speed + +### ServerFiles (Network LLM) +**Advantages**: +- Access to larger, more capable models (7B-70B parameters) +- Consistent inference quality across all clients +- Lower client system requirements +- Centralized model updates without client patches + +**Disadvantages**: +- Network latency (100-1000ms+ response times) +- Server infrastructure and operational costs +- Requires internet connectivity for AI features +- Scalability concerns with many concurrent players + +## Future Enhancements + +### Planned Improvements +- **Context Pooling**: Implement executor reuse to reduce per-NPC memory overhead +- **LRU Cache**: Automatic eviction of idle NPC contexts after configurable timeout +- **Streaming Response UI**: Real-time token-by-token display in dialog boxes +- **Dynamic Model Loading**: Support for multiple models with runtime switching +- **Inference Queue**: Priority queue for managing multiple concurrent NPC responses +- **Response Caching**: Cache responses for common questions to improve performance +- **System Prompt Library**: Predefined personality templates for different NPC types + +### Integration Possibilities +- **Emotion Detection**: Parse LLM responses for NPC emotional state transitions +- **Quest Generation**: Use LLM to dynamically generate side quests from conversations +- **Dynamic Dialog Trees**: Blend scripted dialog with LLM-generated responses +- **Voice Synthesis**: Integrate with TTS for voiced AI NPC conversations +- **Player Profiling**: Adapt NPC personality based on player conversation history + +## Dependencies + +### LLamaSharp Package +- **Version**: 0.25.0 (LLamaSharp.Backend.Cpu) +- **Purpose**: .NET bindings for llama.cpp inference engine +- **Native Libraries**: ggml.dll, llama.dll (AVX512 optimized) +- **Model Format**: GGUF (standardized quantized model format) + +### Unity Packages +- **Unity.VisualScripting**: GUID generation and component integration +- **UnityEngine**: Core Unity functionality and MonoBehaviour lifecycle + +### Model Files +- **Model**: Llama-3.2-1B-Instruct (Unsloth GGUF) +- **Quantization**: Q4_K_M (4-bit quantization, medium quality) +- **Size**: ~1.2GB on disk +- **Location**: Assets/StreamingAssets/Models/ + +## Troubleshooting + +### Common Issues + +**Model Load Failure** +- Verify model file exists at specified path in StreamingAssets +- Check model file isn't corrupted (redownload if necessary) +- Ensure sufficient RAM available (minimum 4GB free recommended) + +**GPU Acceleration Not Working** +- Verify AVX512 DLL plugins are enabled in Unity plugin settings +- Check GPU compatibility (CUDA for NVIDIA, ROCm for AMD) +- Reduce `GpuLayerCount` if VRAM insufficient + +**Context Not Found** +- Ensure `HashNPC()` called during NPC initialization before first interaction +- Verify GUID consistency between registration and retrieval +- Check `UnityLLMContextHasher` instance exists in scene + +**Slow Inference** +- Increase `GpuLayerCount` if VRAM available +- Reduce `MaxTokens` in `InferenceParams` for faster responses +- Consider more aggressive quantization (Q3 or Q2) +- Verify CPU isn't thermal throttling during inference + +**Memory Leaks** +- Ensure `Close()` called on contexts when NPCs destroyed +- Verify `OnApplicationQuit()` executes during shutdown +- Check for circular references preventing context garbage collection + +## Best Practices + +### Context Lifecycle +- Create contexts during NPC initialization, not on first interaction +- Register contexts immediately after creation to prevent orphaned executors +- Update `LastAccessed` timestamp on each interaction for LRU tracking +- Call `Close()` explicitly when removing NPCs from scene + +### System Prompt Design +- Keep system prompts concise (50-200 tokens) to preserve context space +- Include clear personality traits and behavioral constraints +- Specify response format expectations (length, style, perspective) +- Test prompts with various player inputs to ensure consistent behavior + +### Performance Optimization +- Limit maximum active NPC contexts based on target hardware +- Implement conversation timeout to prevent infinite generation +- Use antiprompts to control response length naturally +- Monitor inference time and adjust `GpuLayerCount` dynamically if needed + +### Error Handling +- Wrap LLM inference calls in try-catch for graceful failure +- Implement fallback dialog for inference errors +- Log model loading failures with detailed error information +- Validate context exists before attempting inference operations diff --git a/Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md b/Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md new file mode 100644 index 0000000..a745c26 --- /dev/null +++ b/Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md @@ -0,0 +1,236 @@ +# UnityAIScripts System Logging File + +This file is used by agentic models to log analysis, observations, and insights about the local LLM integration system and AI-powered NPC conversation management. + +## Log Format +- **Timestamp**: Date and time of log entry +- **Component**: Specific script or system being analyzed +- **Observation**: What was observed or analyzed +- **Impact**: How this affects the AI system +- **Recommendations**: Suggested improvements or changes + +--- + + + +## 2026-01-18 - Initial System Analysis + +### Component: NPCContext.cs +**Observation**: NPCContext inherits from MonoBehaviour but is used as a data container class, not a Unity component attached to GameObjects. + +**Impact**: +- MonoBehaviour constructors don't work properly in Unity - they're not meant to be instantiated with `new` +- `OnDestroy()` will never be called unless the NPCContext is actually attached to a GameObject +- Unnecessary overhead from Unity's component lifecycle for what is essentially a POCO (Plain Old C# Object) +- Creates confusion about instantiation pattern (should it be AddComponent or new?) + +**Recommendations**: +- Remove MonoBehaviour inheritance - NPCContext should be a plain C# class +- Remove `OnDestroy()` method as it won't execute for non-attached instances +- Keep the interface implementation for abstraction benefits +- Rely on explicit `Close()` calls from UnityLLMContextHasher for cleanup +- Consider making it a struct if immutability is desired + +--- + +## 2026-01-18 - Model Sharing Architecture + +### Component: UnityLLM.cs +**Observation**: System creates a single static `LLamaContext` from the model and uses it for test executor, but doesn't expose the model or parameters for NPC context creation. + +**Impact**: +- NPCs cannot currently create their own contexts from the shared model +- Static context is created but only used for testing, wasting resources +- No public API for NPCs to access shared model for context creation +- Current design requires each NPC to load their own model (defeats single-instance purpose) + +**Recommendations**: +- Expose `model` and `parameters` as public static properties +- Remove the test-specific static context creation +- Add factory method: `public static LLamaContext CreateNPCContext()` +- Document in code comments that contexts should be created via UnityLLM +- Example: `var context = UnityLLM.CreateNPCContext(); var executor = new InteractiveExecutor(context);` + +--- + +## 2026-01-18 - Context Access Pattern + +### Component: UnityLLMContextHasher.cs +**Observation**: `getNPCContext()` returns NPCContext_intf (interface reference) which is good for abstraction but limits access to implementation-specific methods. + +**Impact**: +- Calling code can only access interface-defined members +- Cannot call `updateNPC()` from NPCContext through interface reference +- `LastAccessed` is exposed in interface so it can be updated, but no update method in interface +- Inconsistency between interface contract and implementation capabilities + +**Recommendations**: +- Add `void UpdateAccess();` to NPCContext_intf interface +- Implement in NPCContext as: `public void UpdateAccess() { LastAccessed = DateTime.Now; }` +- Remove standalone `updateNPC()` method or rename to match interface convention +- Consider adding `bool IsExpired(TimeSpan maxAge)` to interface for LRU checks + +--- + +## 2026-01-18 - Memory Management Concerns + +### Component: System Architecture +**Observation**: No mechanism exists for pruning idle or expired NPC contexts, despite LastAccessed timestamp tracking. + +**Impact**: +- Contexts accumulate indefinitely until application quit +- Memory usage grows linearly with total NPCs encountered (even if no longer in scene) +- No way to reclaim resources for NPCs that have been destroyed or are far from player +- `LastAccessed` property exists but isn't used for any decision making + +**Recommendations**: +- Implement LRU cache eviction in UnityLLMContextHasher +- Add `Update()` or coroutine to periodically check for expired contexts +- Add configuration: `public float contextTimeoutSeconds = 300f; // 5 minutes` +- Implement: `public void PruneIdleContexts(TimeSpan maxIdleTime)` +- Consider max context limit (e.g., only keep 10 most recent contexts) +- Add metrics logging: active contexts, total contexts created, contexts pruned + +--- + +## 2026-01-18 - Error Handling Gap + +### Component: UnityLLM.cs, UnityLLMContextHasher.cs +**Observation**: No try-catch blocks around model loading or context operations; failures will crash application. + +**Impact**: +- Model file missing/corrupted = immediate application crash +- Insufficient memory = unhandled exception and crash +- Context operations during shutdown can throw NullReferenceException +- No graceful degradation path for AI system failure + +**Recommendations**: +- Wrap `LLamaWeights.LoadFromFile()` in try-catch with fallback to disable AI +- Add `public static bool IsModelLoaded { get; private set; }` flag +- Implement null checks before model operations +- Add `HashNPC()` validation: return false if model not loaded +- Log detailed error messages for troubleshooting +- Consider fallback to scripted dialog if model unavailable + +--- + +## 2026-01-18 - Async/Await Pattern + +### Component: UnityLLM.cs +**Observation**: `Awake()` is marked as `async Task` but Unity doesn't natively support async lifecycle methods. + +**Impact**: +- Unity calls Awake() synchronously and doesn't await the Task +- Test conversation may not complete before other initialization code runs +- Race condition between model loading and NPC initialization +- No guarantee Instance is set when other scripts try to access it + +**Recommendations**: +- Change `Awake()` to synchronous, move async code to separate initialization method +- Use `async void Start()` for Unity-compatible async lifecycle +- Or implement: `public static async Task InitializeModel()` and call from game manager +- Add `IsInitialized` flag to track initialization completion +- Make other systems wait for model initialization before registering contexts + +--- + +## 2026-01-18 - Unused Dependencies + +### Component: UnityLLM.cs +**Observation**: Imports `Mono.Cecil.Cil` and `UnityEditor.Rendering.LookDev` which are not used in the code. + +**Impact**: +- Unnecessary assembly references increase compilation time +- Editor-only namespaces (`UnityEditor`) will cause build errors for standalone builds +- Clutters code and creates confusion about actual dependencies +- May indicate copied boilerplate code not cleaned up + +**Recommendations**: +- Remove unused using statements: `Mono.Cecil.Cil` and `UnityEditor.Rendering.LookDev` +- Use IDE/editor to organize and remove unused imports +- Verify build succeeds without UnityEditor dependencies +- Document actual required dependencies in code comments + +--- + +## 2026-01-18 - Configuration Management + +### Component: UnityLLM.cs +**Observation**: Model path, context size, and GPU layers are hard-coded constants. + +**Impact**: +- Requires code changes to adjust configuration per deployment +- Cannot optimize for different hardware without recompilation +- No way to A/B test different model configurations +- Path assumptions may break on different platforms or project structures + +**Recommendations**: +- Create `LLMConfiguration` ScriptableObject for settings +- Expose: model path, context size, GPU layers, max tokens, temperature +- Add platform-specific configuration overrides (PC vs mobile) +- Implement model path validation with fallback search paths +- Add runtime configuration UI for testing different settings +- Use relative paths that work with StreamingAssets on all platforms + +--- + +## 2026-01-18 - Context Retrieval Safety + +### Component: UnityLLMContextHasher.cs +**Observation**: `getNPCContext()` returns null for missing contexts, requiring null checks at every call site. + +**Impact**: +- Easy to forget null check and get NullReferenceException +- Repetitive null checking code in all dialog/interaction systems +- No logging when context lookup fails (silent failure) +- Difficult to distinguish between "NPC not registered" and "hasher not initialized" + +**Recommendations**: +- Add `TryGetNPCContext(GUID npcId, out NPCContext_intf context)` method +- Log warning when context not found (helps debugging) +- Consider throwing exception for unexpected missing contexts vs returning null for expected cases +- Add `EnsureContext(GUID npcId)` helper that creates default context if missing +- Document expected usage pattern in XML comments + +--- + +## 2026-01-18 - Inference Parameters Duplication + +### Component: UnityLLM.cs, NPCContext.cs +**Observation**: InferenceParams defined both in UnityLLM (for testing) and per-NPC in NPCContext. + +**Impact**: +- Unclear which parameters are "defaults" and which are customized +- Test parameters in UnityLLM don't represent actual NPC usage +- No shared default configuration for NPCs to start from +- Each NPC creator must know to configure all inference parameters + +**Recommendations**: +- Add `public static InferenceParams DefaultInferenceParams` to UnityLLM +- NPCContext constructor should accept optional parameters, defaulting to UnityLLM defaults +- Document which parameters are safe to customize per NPC vs system-wide +- Consider parameter validation (e.g., MaxTokens must be < ContextSize) +- Add preset configurations: "verbose", "concise", "creative", "factual" + +--- + +## 2026-01-18 - Testing and Validation + +### Component: System Architecture +**Observation**: Test conversation in UnityLLM.Awake() is hard-coded and runs every application start. + +**Impact**: +- Adds 5-10 seconds to every startup for test inference +- Test output clutters logs during normal gameplay +- No way to disable test without code modification +- Wastes tokens/context for no gameplay benefit in production + +**Recommendations**: +- Add `public bool runStartupTest = false;` serialized field (default false) +- Guard test conversation with `if (runStartupTest)` check +- Move test to separate test component or editor-only script +- Add proper unit tests for context management using Unity Test Framework +- Consider test scene specifically for LLM functionality validation +- Add performance benchmarks: tokens/sec, first token latency, memory usage + +--- diff --git a/Assets/Scripts/UnityAIScripts/UnityLLM.cs b/Assets/Scripts/UnityAIScripts/UnityLLM.cs index 347a7b6..b230e4a 100644 --- a/Assets/Scripts/UnityAIScripts/UnityLLM.cs +++ b/Assets/Scripts/UnityAIScripts/UnityLLM.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.Threading.Tasks; +// Unity Script to act as a single point of truth for LLM model and context class UnityLLM : MonoBehaviour { public static UnityLLM Instance { get; private set; } diff --git a/Assets/Scripts/UnityAIScripts/UnityLLMContextHasher.cs b/Assets/Scripts/UnityAIScripts/UnityLLMContextHasher.cs new file mode 100644 index 0000000..9967483 --- /dev/null +++ b/Assets/Scripts/UnityAIScripts/UnityLLMContextHasher.cs @@ -0,0 +1,76 @@ +using System; +using System.Collections.Generic; +using System.Net.Sockets; +using Unity.VisualScripting; +using UnityEditor; +using UnityEngine; +using LLama; +using LLama.Common; + +public class UnityLLMContextHasher : MonoBehaviour +{ + private Dictionary npcContext= new Dictionary(); + public static UnityLLMContextHasher Instance { get; private set; } + private bool applicationOver = false; + public void Awake() + { + Instance = this; + } + + public bool containsNPC(GUID npcID){ + return npcContext.ContainsKey(npcID); + } + + public NPCContext_intf getNPCContext(GUID npcID){ + + if(containsNPC(npcID)){ + return npcContext[npcID]; + } + return null; + } + + private void displayHashedNPCs(){ + foreach (KeyValuePair kvp in npcContext){ + Debug.Log("Key: " + kvp.Key + " Value: " + kvp.Value.SystemPrompt); + } + } + + void OnApplicationQuit(){ + foreach (KeyValuePair kvp in npcContext){ + kvp.Value.Close(); + } + npcContext.Clear(); + applicationOver = true; + Debug.Log("Hasher cleared"); + } + + // private void Update() + // { + // } + + public bool HashNPC(GUID npcID, NPCContext_intf npcContextEntry) + { + //Establish connection and then hash the NPC with clientID + if (applicationOver || this == null || gameObject == null) + { + if (npcContext.Count != 0) + { + foreach (KeyValuePair kvp in npcContext) + { + kvp.Value.Close(); + } + npcContext.Clear(); + } + return false; // Application is quitting or object is destroyed + } + if (!containsNPC(npcID)) + { + Debug.Log("Hashing NPC with ID: " + npcID); + Debug.Log("Client hashed with NPC context: " + npcContextEntry.SystemPrompt); + + npcContext[npcID] = npcContextEntry; + return true; + } + return false; + } +} From 9b105415588be8b7942fe544fb24ae9360cd0ad7 Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sun, 18 Jan 2026 14:29:00 -0600 Subject: [PATCH 04/13] updated ignore.conf --- ignore.conf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ignore.conf b/ignore.conf index ca1c548..17fd466 100644 --- a/ignore.conf +++ b/ignore.conf @@ -60,4 +60,5 @@ crashlytics-build.properties Thumbs.db Desktop.ini .git -.git/* \ No newline at end of file +.git/* +*.p7s \ No newline at end of file From 854698fbffcf7f32977c61561aa2ee7d84b8ca5e Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Tue, 10 Mar 2026 22:17:59 -0500 Subject: [PATCH 05/13] Basic llm interactions with llama.cpp works --- Assets/Scripts/NPC/LLM_NPCController.cs | 7 +++- Assets/Scripts/UnityAIScripts/UnityLLM.cs | 39 +++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/Assets/Scripts/NPC/LLM_NPCController.cs b/Assets/Scripts/NPC/LLM_NPCController.cs index b99ab05..ff8d65e 100644 --- a/Assets/Scripts/NPC/LLM_NPCController.cs +++ b/Assets/Scripts/NPC/LLM_NPCController.cs @@ -35,7 +35,12 @@ public async Task getDialog(List userSpeech, GUID npcID){ Debug.Log("Still connected to NPC: " + Hasher.Instance.getNPCConnection(npcID).Client.Connected); string conversation = reformatDialog(userSpeech); try{ - string dialog = await ServerSocketC.Instance.NPCRequest(conversation, Hasher.Instance.getNPCConnection(npcID).Client, Hasher.Instance.getNPCConnection(npcID).Stream); + //string dialog = await ServerSocketC.Instance.NPCRequest(conversation, Hasher.Instance.getNPCConnection(npcID).Client, Hasher.Instance.getNPCConnection(npcID).Stream); + + Debug.Log("Sending to LLM ----------- " + userSpeech[^1]); + string dialog = await UnityLLM.Instance.talk2LLM(userSpeech[^1]); + Debug.Log("Got back from LLM --------- " + dialog); + return dialog; }catch (System.Exception e){ Debug.Log(e.Message); diff --git a/Assets/Scripts/UnityAIScripts/UnityLLM.cs b/Assets/Scripts/UnityAIScripts/UnityLLM.cs index b230e4a..98b3bca 100644 --- a/Assets/Scripts/UnityAIScripts/UnityLLM.cs +++ b/Assets/Scripts/UnityAIScripts/UnityLLM.cs @@ -5,6 +5,11 @@ using UnityEditor.Rendering.LookDev; using System.Collections.Generic; using System.Threading.Tasks; +using System; +using System.Runtime.CompilerServices; +using Unity.VisualScripting; +using Unity.VectorGraphics.Editor; +using UnityEngine.UI; // Unity Script to act as a single point of truth for LLM model and context class UnityLLM : MonoBehaviour @@ -54,4 +59,38 @@ in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, "Can you write a p } // Add UnityLLM specific methods and properties here + static LLamaContext freshContext = model.CreateContext(parameters); + InteractiveExecutor freshExec = new InteractiveExecutor(freshContext); + public async Task talk2LLM(string user) + { + ChatHistory cH = new ChatHistory(); + + cH.AddMessage(AuthorRole.System, "Give yourself a random personality and roleplay them"); + + + ChatSession session = new(freshExec, cH); + + string resp = string.Empty; + + if (user.Length > 0){ + await foreach( + string text + in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, user), inferenceParams) + ) + { + resp += text; + } + } + else + { + await foreach( + string text + in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, "Give yourself a random personality and roleplay them"), inferenceParams) + ) + { + resp += text; + } + } + return resp; + } } \ No newline at end of file From d5c4848457730a714c4b46b1bea7c76236f4e619 Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Tue, 10 Mar 2026 22:39:56 -0500 Subject: [PATCH 06/13] Macro'd legacy TCP code away --- Assets/Scripts/Authentication/AuthManager.cs | 41 +++--- Assets/Scripts/Killports.cs | 122 +++++++++--------- Assets/Scripts/NPC/NPCController.cs | 6 +- Assets/Scripts/ServerFiles/ServerSocketC.cs | 10 +- .../UnityEngineHelper/DomainReloadHelper.cs | 34 ++--- Assets/Scripts/constData.cs | 6 + 6 files changed, 122 insertions(+), 97 deletions(-) create mode 100644 Assets/Scripts/constData.cs diff --git a/Assets/Scripts/Authentication/AuthManager.cs b/Assets/Scripts/Authentication/AuthManager.cs index 16c4dee..612bbcb 100644 --- a/Assets/Scripts/Authentication/AuthManager.cs +++ b/Assets/Scripts/Authentication/AuthManager.cs @@ -21,24 +21,29 @@ public class AuthManager : MonoBehaviour private void Awake() { - Instance = this; - GenerateDynamicSecret(); - GenerateSessionKey(); - SetupAuthenticationPipe(); - - // CRITICAL: Register for domain reload cleanup - #if UNITY_EDITOR - UnityEditor.AssemblyReloadEvents.beforeAssemblyReload += OnBeforeDomainReload; - #endif + if (constData.USING_TCP){ + Instance = this; + GenerateDynamicSecret(); + GenerateSessionKey(); + SetupAuthenticationPipe(); + + // CRITICAL: Register for domain reload cleanup + #if UNITY_EDITOR + UnityEditor.AssemblyReloadEvents.beforeAssemblyReload += OnBeforeDomainReload; + #endif + } } #if UNITY_EDITOR private void OnBeforeDomainReload() { - Debug.Log("AuthManager: Domain reload detected - cleaning up immediately"); - CleanupIPC(); - UnityEditor.AssemblyReloadEvents.beforeAssemblyReload -= OnBeforeDomainReload; + if (constData.USING_TCP){ + Debug.Log("AuthManager: Domain reload detected - cleaning up immediately"); + CleanupIPC(); + UnityEditor.AssemblyReloadEvents.beforeAssemblyReload -= OnBeforeDomainReload; + } } + #endif private void GenerateDynamicSecret() @@ -206,14 +211,18 @@ public bool ValidateResponse(string response) private void OnApplicationQuit() { - // Clean up IPC resources - CleanupIPC(); + if (constData.USING_TCP){ + // Clean up IPC resources + CleanupIPC(); + } } private void OnDestroy() { - // Clean up IPC resources - CleanupIPC(); + if (constData.USING_TCP){ + // Clean up IPC resources + CleanupIPC(); + } } public static void ForceCleanupAllInstances() diff --git a/Assets/Scripts/Killports.cs b/Assets/Scripts/Killports.cs index 323dd2c..29728c2 100644 --- a/Assets/Scripts/Killports.cs +++ b/Assets/Scripts/Killports.cs @@ -10,73 +10,75 @@ public class Killports : MonoBehaviour // Start is called once before the first execution of Update after the MonoBehaviour is created void OnApplicationQuit() { - UnityEngine.Debug.Log("Killing processes on port " + port); - try - { - // Use netstat to find processes using this port - Process process = new Process(); - if (Application.platform == RuntimePlatform.WindowsEditor || - Application.platform == RuntimePlatform.WindowsPlayer) { - // Windows netstat command - process.StartInfo.FileName = "cmd.exe"; - process.StartInfo.Arguments = $"/c netstat -ano | findstr :{port}"; - } - else{ - // Unix netstat command - process.StartInfo.FileName = "/bin/bash"; - process.StartInfo.Arguments = $"-c netstat -ano | grep {port}"; - } - process.StartInfo.UseShellExecute = false; - process.StartInfo.RedirectStandardOutput = true; - process.StartInfo.CreateNoWindow = true; - - process.Start(); - string output = process.StandardOutput.ReadToEnd(); - process.WaitForExit(); - - UnityEngine.Debug.Log($"Netstat output: {output}"); - - // Extract PIDs using regex - only match lines where 25001 is the first port - Regex pidRegex = new Regex(@"TCP\s+\d+\.\d+\.\d+\.\d+:25001\s+\d+\.\d+\.\d+\.\d+:\d+\s+\w+\s+(\d+)", RegexOptions.Multiline); - MatchCollection matches = pidRegex.Matches(output); - - UnityEngine.Debug.Log($"Found {matches.Count} listener processes on port {port}"); - - // Kill each process found - foreach (Match match in matches) + if (constData.USING_TCP){ + UnityEngine.Debug.Log("Killing processes on port " + port); + try { - // The PID is in the first capture group - string pidString = match.Groups[1].Value.Trim(); - if (int.TryParse(pidString, out int pid)) + // Use netstat to find processes using this port + Process process = new Process(); + if (Application.platform == RuntimePlatform.WindowsEditor || + Application.platform == RuntimePlatform.WindowsPlayer) { + // Windows netstat command + process.StartInfo.FileName = "cmd.exe"; + process.StartInfo.Arguments = $"/c netstat -ano | findstr :{port}"; + } + else{ + // Unix netstat command + process.StartInfo.FileName = "/bin/bash"; + process.StartInfo.Arguments = $"-c netstat -ano | grep {port}"; + } + process.StartInfo.UseShellExecute = false; + process.StartInfo.RedirectStandardOutput = true; + process.StartInfo.CreateNoWindow = true; + + process.Start(); + string output = process.StandardOutput.ReadToEnd(); + process.WaitForExit(); + + UnityEngine.Debug.Log($"Netstat output: {output}"); + + // Extract PIDs using regex - only match lines where 25001 is the first port + Regex pidRegex = new Regex(@"TCP\s+\d+\.\d+\.\d+\.\d+:25001\s+\d+\.\d+\.\d+\.\d+:\d+\s+\w+\s+(\d+)", RegexOptions.Multiline); + MatchCollection matches = pidRegex.Matches(output); + + UnityEngine.Debug.Log($"Found {matches.Count} listener processes on port {port}"); + + // Kill each process found + foreach (Match match in matches) { - // Skip PID 0 and other system processes - if (pid == 0 || pid == 4) // PID 4 is the System process on Windows + // The PID is in the first capture group + string pidString = match.Groups[1].Value.Trim(); + if (int.TryParse(pidString, out int pid)) { - UnityEngine.Debug.Log($"Skipping system process with PID {pid}"); - continue; - } + // Skip PID 0 and other system processes + if (pid == 0 || pid == 4) // PID 4 is the System process on Windows + { + UnityEngine.Debug.Log($"Skipping system process with PID {pid}"); + continue; + } - // Also good to check against current process - if (pid == Process.GetCurrentProcess().Id) - { - UnityEngine.Debug.Log($"Skipping current process with PID {pid}"); - continue; - } - try - { - Process.GetProcessById(pid).Kill(); - UnityEngine.Debug.Log($"Killed process with PID {pid} hosting port {port}"); - } - catch (Exception ex) - { - UnityEngine.Debug.LogError($"Failed to kill process {pid}: {ex.Message}"); + // Also good to check against current process + if (pid == Process.GetCurrentProcess().Id) + { + UnityEngine.Debug.Log($"Skipping current process with PID {pid}"); + continue; + } + try + { + Process.GetProcessById(pid).Kill(); + UnityEngine.Debug.Log($"Killed process with PID {pid} hosting port {port}"); + } + catch (Exception ex) + { + UnityEngine.Debug.LogError($"Failed to kill process {pid}: {ex.Message}"); + } } } } - } - catch (Exception ex) - { - UnityEngine.Debug.LogError($"Error killing processes on port {port}: {ex.Message}"); + catch (Exception ex) + { + UnityEngine.Debug.LogError($"Error killing processes on port {port}: {ex.Message}"); + } } } } diff --git a/Assets/Scripts/NPC/NPCController.cs b/Assets/Scripts/NPC/NPCController.cs index c6977ea..21f2ab6 100644 --- a/Assets/Scripts/NPC/NPCController.cs +++ b/Assets/Scripts/NPC/NPCController.cs @@ -17,7 +17,7 @@ public class NPCController : MonoBehaviour, Interactable_intf public enum NPCState { Idle, Walking, Speaking } CharacterMove charMove; - + NPCState state; float idleTimer = 0f; int currentPattern = 0; @@ -99,7 +99,9 @@ private void Start() { isAI = true; dialogBecomesContext(); - establishAndStoreConnection(); + if (constData.USING_TCP){ + establishAndStoreConnection(); + } } } diff --git a/Assets/Scripts/ServerFiles/ServerSocketC.cs b/Assets/Scripts/ServerFiles/ServerSocketC.cs index ebd11a7..5660826 100644 --- a/Assets/Scripts/ServerFiles/ServerSocketC.cs +++ b/Assets/Scripts/ServerFiles/ServerSocketC.cs @@ -23,7 +23,9 @@ private void Awake() } private void Start(){ - StartCoroutine(startSteps()); + if (constData.USING_TCP){ + StartCoroutine(startSteps()); + } } private IEnumerator startSteps(int retries = 3) @@ -53,8 +55,10 @@ private IEnumerator startSteps(int retries = 3) // } void OnApplicationQuit(){ - stopRetrying = true; - stopPythonServer(); + if (constData.USING_TCP) { + stopRetrying = true; + stopPythonServer(); + } } void startPythonServer(){ diff --git a/Assets/Scripts/UnityEngineHelper/DomainReloadHelper.cs b/Assets/Scripts/UnityEngineHelper/DomainReloadHelper.cs index ddbe348..ecfd201 100644 --- a/Assets/Scripts/UnityEngineHelper/DomainReloadHelper.cs +++ b/Assets/Scripts/UnityEngineHelper/DomainReloadHelper.cs @@ -16,23 +16,25 @@ static DomainReloadHelper() private static void OnBeforeDomainReload() { - Debug.Log("DomainReloadHelper: Domain reload starting - forcing cleanup"); - - // Force cleanup of AuthManager - AuthManager.ForceCleanupAllInstances(); - - // Force cleanup of other singletons if needed - if (Hasher.Instance != null) - { - Hasher.Instance.SendMessage("OnApplicationQuit", SendMessageOptions.DontRequireReceiver); + if (constData.USING_TCP){ + Debug.Log("DomainReloadHelper: Domain reload starting - forcing cleanup"); + + // Force cleanup of AuthManager + AuthManager.ForceCleanupAllInstances(); + + // Force cleanup of other singletons if needed + if (Hasher.Instance != null) + { + Hasher.Instance.SendMessage("OnApplicationQuit", SendMessageOptions.DontRequireReceiver); + } + + if (ServerSocketC.Instance != null) + { + ServerSocketC.Instance.SendMessage("OnApplicationQuit", SendMessageOptions.DontRequireReceiver); + } + + Debug.Log("DomainReloadHelper: Cleanup completed"); } - - if (ServerSocketC.Instance != null) - { - ServerSocketC.Instance.SendMessage("OnApplicationQuit", SendMessageOptions.DontRequireReceiver); - } - - Debug.Log("DomainReloadHelper: Cleanup completed"); } [MenuItem("Tools/Force Cleanup Before Domain Reload")] diff --git a/Assets/Scripts/constData.cs b/Assets/Scripts/constData.cs new file mode 100644 index 0000000..ca51a74 --- /dev/null +++ b/Assets/Scripts/constData.cs @@ -0,0 +1,6 @@ +using UnityEngine; + +public class constData +{ + public const bool USING_TCP = false; +} From 5e747317d6268a456ac6b4769e50848efbbffa77 Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Tue, 10 Mar 2026 22:46:36 -0500 Subject: [PATCH 07/13] renamed flag --- Assets/Scripts/Authentication/AuthManager.cs | 8 ++++---- Assets/Scripts/Killports.cs | 2 +- Assets/Scripts/NPC/NPCController.cs | 2 +- Assets/Scripts/ServerFiles/ServerSocketC.cs | 4 ++-- Assets/Scripts/UnityEngineHelper/DomainReloadHelper.cs | 2 +- Assets/Scripts/constData.cs | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Assets/Scripts/Authentication/AuthManager.cs b/Assets/Scripts/Authentication/AuthManager.cs index 612bbcb..066f45d 100644 --- a/Assets/Scripts/Authentication/AuthManager.cs +++ b/Assets/Scripts/Authentication/AuthManager.cs @@ -21,7 +21,7 @@ public class AuthManager : MonoBehaviour private void Awake() { - if (constData.USING_TCP){ + if (constData._tcp){ Instance = this; GenerateDynamicSecret(); GenerateSessionKey(); @@ -37,7 +37,7 @@ private void Awake() #if UNITY_EDITOR private void OnBeforeDomainReload() { - if (constData.USING_TCP){ + if (constData._tcp){ Debug.Log("AuthManager: Domain reload detected - cleaning up immediately"); CleanupIPC(); UnityEditor.AssemblyReloadEvents.beforeAssemblyReload -= OnBeforeDomainReload; @@ -211,7 +211,7 @@ public bool ValidateResponse(string response) private void OnApplicationQuit() { - if (constData.USING_TCP){ + if (constData._tcp){ // Clean up IPC resources CleanupIPC(); } @@ -219,7 +219,7 @@ private void OnApplicationQuit() private void OnDestroy() { - if (constData.USING_TCP){ + if (constData._tcp){ // Clean up IPC resources CleanupIPC(); } diff --git a/Assets/Scripts/Killports.cs b/Assets/Scripts/Killports.cs index 29728c2..b047619 100644 --- a/Assets/Scripts/Killports.cs +++ b/Assets/Scripts/Killports.cs @@ -10,7 +10,7 @@ public class Killports : MonoBehaviour // Start is called once before the first execution of Update after the MonoBehaviour is created void OnApplicationQuit() { - if (constData.USING_TCP){ + if (constData._tcp){ UnityEngine.Debug.Log("Killing processes on port " + port); try { diff --git a/Assets/Scripts/NPC/NPCController.cs b/Assets/Scripts/NPC/NPCController.cs index 21f2ab6..ffbebda 100644 --- a/Assets/Scripts/NPC/NPCController.cs +++ b/Assets/Scripts/NPC/NPCController.cs @@ -99,7 +99,7 @@ private void Start() { isAI = true; dialogBecomesContext(); - if (constData.USING_TCP){ + if (constData._tcp){ establishAndStoreConnection(); } } diff --git a/Assets/Scripts/ServerFiles/ServerSocketC.cs b/Assets/Scripts/ServerFiles/ServerSocketC.cs index 5660826..25a4448 100644 --- a/Assets/Scripts/ServerFiles/ServerSocketC.cs +++ b/Assets/Scripts/ServerFiles/ServerSocketC.cs @@ -23,7 +23,7 @@ private void Awake() } private void Start(){ - if (constData.USING_TCP){ + if (constData._tcp){ StartCoroutine(startSteps()); } } @@ -55,7 +55,7 @@ private IEnumerator startSteps(int retries = 3) // } void OnApplicationQuit(){ - if (constData.USING_TCP) { + if (constData._tcp) { stopRetrying = true; stopPythonServer(); } diff --git a/Assets/Scripts/UnityEngineHelper/DomainReloadHelper.cs b/Assets/Scripts/UnityEngineHelper/DomainReloadHelper.cs index ecfd201..7cef8fc 100644 --- a/Assets/Scripts/UnityEngineHelper/DomainReloadHelper.cs +++ b/Assets/Scripts/UnityEngineHelper/DomainReloadHelper.cs @@ -16,7 +16,7 @@ static DomainReloadHelper() private static void OnBeforeDomainReload() { - if (constData.USING_TCP){ + if (constData._tcp){ Debug.Log("DomainReloadHelper: Domain reload starting - forcing cleanup"); // Force cleanup of AuthManager diff --git a/Assets/Scripts/constData.cs b/Assets/Scripts/constData.cs index ca51a74..00d06c7 100644 --- a/Assets/Scripts/constData.cs +++ b/Assets/Scripts/constData.cs @@ -2,5 +2,5 @@ public class constData { - public const bool USING_TCP = false; + public const bool _tcp = false; } From 87f7f630fef5b837eb561b64d6a97ab06ae5665e Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sun, 15 Mar 2026 12:17:16 -0500 Subject: [PATCH 08/13] Wired up npcs to llma.cpp and they store independent history --- Assets/Scripts/NPC/LLM_NPCController.cs | 21 ++-- Assets/Scripts/NPC/NPCController.cs | 30 ++++- Assets/Scripts/UnityAIScripts/UnityLLM.cs | 127 +++++++++++++--------- 3 files changed, 113 insertions(+), 65 deletions(-) diff --git a/Assets/Scripts/NPC/LLM_NPCController.cs b/Assets/Scripts/NPC/LLM_NPCController.cs index ff8d65e..6653892 100644 --- a/Assets/Scripts/NPC/LLM_NPCController.cs +++ b/Assets/Scripts/NPC/LLM_NPCController.cs @@ -32,15 +32,22 @@ private string reformatDialog(List dialog){ } public async Task getDialog(List userSpeech, GUID npcID){ - Debug.Log("Still connected to NPC: " + Hasher.Instance.getNPCConnection(npcID).Client.Connected); - string conversation = reformatDialog(userSpeech); try{ - //string dialog = await ServerSocketC.Instance.NPCRequest(conversation, Hasher.Instance.getNPCConnection(npcID).Client, Hasher.Instance.getNPCConnection(npcID).Stream); - - Debug.Log("Sending to LLM ----------- " + userSpeech[^1]); - string dialog = await UnityLLM.Instance.talk2LLM(userSpeech[^1]); + string dialog; + if (constData._tcp) + { + Debug.Log("Still connected to NPC: " + Hasher.Instance.getNPCConnection(npcID).Client.Connected); + string conversation = reformatDialog(userSpeech); + Debug.Log("Sending to TCP server ----------- " + conversation); + dialog = await ServerSocketC.Instance.NPCRequest(conversation, Hasher.Instance.getNPCConnection(npcID).Client, Hasher.Instance.getNPCConnection(npcID).Stream); + } + else + { + NPCContext_intf ctx = UnityLLMContextHasher.Instance.getNPCContext(npcID); + Debug.Log("Sending to LLM ----------- " + userSpeech[^1]); + dialog = await UnityLLM.Instance.talk2LLMWithContext(ctx, userSpeech[^1]); + } Debug.Log("Got back from LLM --------- " + dialog); - return dialog; }catch (System.Exception e){ Debug.Log(e.Message); diff --git a/Assets/Scripts/NPC/NPCController.cs b/Assets/Scripts/NPC/NPCController.cs index ffbebda..f38e3c6 100644 --- a/Assets/Scripts/NPC/NPCController.cs +++ b/Assets/Scripts/NPC/NPCController.cs @@ -33,6 +33,7 @@ public enum NPCState { Idle, Walking, Speaking } public GUID npcID { get; private set; } private bool stopRetrying = false; + private string npcPersonality; public void Interact(Transform initiator) { @@ -80,8 +81,9 @@ public void Interact(Transform initiator) private void dialogBecomesContext() { + npcPersonality = LLM_NPCController.Instance.generatePersonality(ogAI); dialog = new Dialog(); - dialog.initFirst(LLM_NPCController.Instance.generatePersonality(ogAI)); + dialog.initFirst(npcPersonality); } private void Awake() @@ -99,8 +101,16 @@ private void Start() { isAI = true; dialogBecomesContext(); - if (constData._tcp){ - establishAndStoreConnection(); + if (constData._tcp) + { +#pragma warning disable CS0162 + _ = establishAndStoreConnection(); +#pragma warning restore CS0162 + } + else + { + NPCContext ctx = UnityLLM.CreateNPCContext(npcID, npcPersonality); + UnityLLMContextHasher.Instance.HashNPC(npcID, ctx); } } @@ -160,8 +170,18 @@ private void OnDestroy() if (isAI) { - Debug.Log("NPCController: OnDestroy - Stopping NPC connection"); - stopRetrying = true; + if (constData._tcp) + { +#pragma warning disable CS0162 + Debug.Log("NPCController: OnDestroy - Stopping NPC connection"); + stopRetrying = true; +#pragma warning restore CS0162 + } + else + { + UnityLLMContextHasher.Instance.getNPCContext(npcID)?.Close(); + Debug.Log("NPCController: OnDestroy - Closed NPC llama.cpp context"); + } } } } diff --git a/Assets/Scripts/UnityAIScripts/UnityLLM.cs b/Assets/Scripts/UnityAIScripts/UnityLLM.cs index 98b3bca..62710dd 100644 --- a/Assets/Scripts/UnityAIScripts/UnityLLM.cs +++ b/Assets/Scripts/UnityAIScripts/UnityLLM.cs @@ -1,4 +1,5 @@ using UnityEngine; +using UnityEditor; using LLama; using LLama.Common; using Mono.Cecil.Cil; @@ -17,80 +18,100 @@ class UnityLLM : MonoBehaviour public static UnityLLM Instance { get; private set; } private static string modelPath = @"Assets\StreamingAssets\Models\models--unsloth--Llama-3.2-1B-Instruct-GGUF\snapshots\b69aef112e9f895e6f98d7ae0949f72ff09aa401\Llama-3.2-1B-Instruct-Q4_K_M.gguf"; - private static ModelParams parameters = new ModelParams(modelPath) + public static ModelParams parameters = new ModelParams(modelPath) { ContextSize = 1024, // The longest length of chat as memory. GpuLayerCount = 5 // How many layers to offload to GPU. Please adjust it according to your GPU memory. }; - private static LLamaWeights model = LLamaWeights.LoadFromFile(parameters); - - private static LLamaContext context = model.CreateContext(parameters); - - private InteractiveExecutor executor = new InteractiveExecutor(context); - - private ChatHistory chatHistory = new ChatHistory(); - - private InferenceParams inferenceParams = new InferenceParams() - { - MaxTokens = 256, // No more than 256 tokens should appear in answer. Remove it if antiprompt is enough for control. - AntiPrompts = new List { "User:" } // Stop generation once antiprompts appear. - }; - private async Task Awake() + public static LLamaWeights model = LLamaWeights.LoadFromFile(parameters); + private async void Awake() { Instance = this; - //Load the model - chatHistory.AddMessage(AuthorRole.System, "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision."); - chatHistory.AddMessage(AuthorRole.User, "Hello, Bob."); - chatHistory.AddMessage(AuthorRole.Assistant, "Hello. How may I help you today?"); - - ChatSession session = new(executor, chatHistory); - string resp = string.Empty; - await foreach ( - string text - in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, "Can you write a poem about Unity?"), inferenceParams) - ) + if (constData._tcp) { - resp += text; - } - - UnityEngine.Debug.Log("Response from UnityLLM----------------: " + resp); - } - - // Add UnityLLM specific methods and properties here - static LLamaContext freshContext = model.CreateContext(parameters); - InteractiveExecutor freshExec = new InteractiveExecutor(freshContext); - public async Task talk2LLM(string user) - { - ChatHistory cH = new ChatHistory(); - - cH.AddMessage(AuthorRole.System, "Give yourself a random personality and roleplay them"); - - - ChatSession session = new(freshExec, cH); - - string resp = string.Empty; - - if (user.Length > 0){ - await foreach( +#pragma warning disable CS0162 + // Legacy: startup test conversation for validating the TCP/server path + var testContext = model.CreateContext(parameters); + var testExec = new InteractiveExecutor(testContext); + var testHistory = new ChatHistory(); + var testParams = new InferenceParams { MaxTokens = 256, AntiPrompts = new List { "User:" } }; + + testHistory.AddMessage(AuthorRole.System, "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision."); + testHistory.AddMessage(AuthorRole.User, "Hello, Bob."); + testHistory.AddMessage(AuthorRole.Assistant, "Hello. How may I help you today?"); + + ChatSession session = new(testExec, testHistory); + string resp = string.Empty; + await foreach ( string text - in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, user), inferenceParams) + in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, "Can you write a poem about Unity?"), testParams) ) { resp += text; } + UnityEngine.Debug.Log("Response from UnityLLM----------------: " + resp); +#pragma warning restore CS0162 } else { - await foreach( - string text - in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, "Give yourself a random personality and roleplay them"), inferenceParams) - ) + UnityEngine.Debug.Log("UnityLLM: per-NPC context mode (llama.cpp). Shared model loaded."); + } + } + + // Legacy: single shared context — TCP mode only + public async Task talk2LLM(string user) + { + if (constData._tcp) + { +#pragma warning disable CS0162 + var freshContext = model.CreateContext(parameters); + var freshExec = new InteractiveExecutor(freshContext); + var cH = new ChatHistory(); + var legacyParams = new InferenceParams { MaxTokens = 256, AntiPrompts = new List { "User:" } }; + cH.AddMessage(AuthorRole.System, "Give yourself a random personality and roleplay them"); + + ChatSession session = new(freshExec, cH); + string prompt = user.Length > 0 ? user : "Give yourself a random personality and roleplay them"; + string resp = string.Empty; + await foreach (string text in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, prompt), legacyParams)) { resp += text; } + return resp; +#pragma warning restore CS0162 + } + return string.Empty; + } + + // Per-NPC context factory — call once per NPC on Start() + public static NPCContext CreateNPCContext(GUID npcId, string systemPrompt) + { + var npcLlamaContext = model.CreateContext(parameters); + var executor = new InteractiveExecutor(npcLlamaContext); + var history = new ChatHistory(); + history.AddMessage(AuthorRole.System, systemPrompt); + return new NPCContext( + npcId, + history, + executor, + new InferenceParams { MaxTokens = 256, AntiPrompts = new List { "User:" } }, + systemPrompt + ); + } + + // Per-NPC inference — uses the NPC's own context so histories never bleed + public async Task talk2LLMWithContext(NPCContext_intf ctx, string user) + { + ChatSession session = new(ctx.Executor, ctx.History); + string prompt = user.Length > 0 ? user : "Hello"; + string resp = string.Empty; + await foreach (string text in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, prompt), ctx.InferenceParams)) + { + resp += text; } + ctx.LastAccessed = DateTime.Now; return resp; } } \ No newline at end of file From 18b3f9b435297982ec09b200952bb9a78e087277 Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sun, 15 Mar 2026 12:25:03 -0500 Subject: [PATCH 09/13] Update documentation --- Assets/Scripts/NPC/README.md | 36 +++++------ Assets/Scripts/README.md | 25 +++++--- Assets/Scripts/ServerFiles/README.md | 4 +- Assets/Scripts/UnityAIScripts/README.md | 39 ++++++------ .../UnityAIScripts/UnityAIScripts_Logging.md | 57 ++++++++++++++++- LLM.md | 61 ++++++++++++++++++- README.md | 17 ++++-- ReadmeTodo.md | 24 ++++++++ 8 files changed, 209 insertions(+), 54 deletions(-) diff --git a/Assets/Scripts/NPC/README.md b/Assets/Scripts/NPC/README.md index 6384e2f..08fbd3e 100644 --- a/Assets/Scripts/NPC/README.md +++ b/Assets/Scripts/NPC/README.md @@ -4,7 +4,7 @@ This directory contains the comprehensive NPC system that powers both traditiona ## Architecture Overview -The NPC system implements a modular character architecture where NPCs can operate in two modes: traditional scripted behavior or AI-enhanced dynamic personalities. AI NPCs establish individual network connections to a Python LLM server, enabling unique personality-driven conversations. +The NPC system implements a modular character architecture where NPCs can operate in two modes: traditional scripted behavior or AI-enhanced dynamic personalities. AI NPCs register a per-NPC `LLamaContext` with `UnityLLMContextHasher` on startup (llama.cpp path), or establish individual TCP connections to the Python LLM server (legacy TCP path). The active path is controlled by `constData._tcp`. ## Core Components @@ -22,9 +22,10 @@ The NPC system implements a modular character architecture where NPCs can operat - Collision-aware interaction validation through `InteractManager` components - **AI Integration**: - Automatic AI detection via Unity tags (`NPC_AI`) - - Async TCP connection establishment for AI communication + - `npcPersonality` field captures personality string from `LLM_NPCController.generatePersonality()` + - **llama.cpp path** (`_tcp = false`): calls `UnityLLM.CreateNPCContext(npcID, npcPersonality)` and registers with `UnityLLMContextHasher` on `Start()`; calls `ctx.Close()` on `OnDestroy()` + - **TCP path** (`_tcp = true`): async TCP connection establishment via `ServerSocketC`; connection stored in `Hasher` by GUID - Dynamic dialog generation through `LLM_NPCController` - - Connection lifecycle management with proper cleanup - **Movement Patterns**: - Configurable waypoint-based walking patterns - Timer-controlled movement intervals for natural behavior @@ -35,19 +36,14 @@ The NPC system implements a modular character architecture where NPCs can operat **AI integration controller** managing LLM communication and response generation. - **Purpose**: Singleton service coordinating AI personality and conversation generation - **Technical Details**: - - Conversation context formatting for LLM prompting - - Async communication with Python LLM server + - Dispatches on `constData._tcp` to select inference path + - Async communication with correct backend - Error handling and connection validation - Dialog history management for context-aware responses - **Conversation Management**: - - Context reformation: combines user input with conversation history - - Role-based dialog formatting (Player/NPC turn tracking) + - **llama.cpp path**: retrieves `NPCContext_intf` from `UnityLLMContextHasher` by NPC GUID; calls `UnityLLM.Instance.talk2LLMWithContext()` — history is natively maintained in the NPC's `ChatSession` + - **TCP path**: formats request as `Invoke:::prompt:::Context:::history` via `reformatDialog()`; sends via `ServerSocketC.NPCRequest()` using the NPC's `TcpClient` from `Hasher` - Personality generation for unique NPC characteristics - - Integration with socket communication layer -- **Protocol Design**: - - Structured prompting with context and invocation separation - - Error propagation for network communication failures - - Connection state validation before requests ### Interactable_intf.cs **Interaction interface** defining the contract for interactive game objects. @@ -77,12 +73,16 @@ The NPC system implements a modular character architecture where NPCs can operat ## Technical Implementation ### AI NPC Lifecycle -1. **Initialization**: GUID generation and component setup -2. **AI Detection**: Tag-based AI capability detection -3. **Connection Establishment**: Async TCP connection to Python server -4. **Personality Generation**: LLM-based character personality creation -5. **Conversation Management**: Context-aware dialog generation -6. **Cleanup**: Connection termination on object destruction +1. **Initialization**: GUID generation and component setup in `Awake()` +2. **AI Detection**: Tag-based AI capability detection (`NPC_AI`) +3. **Personality Generation**: `LLM_NPCController.generatePersonality()` called; result stored in `npcPersonality` and used as system prompt +4. **Context Registration**: + - `_tcp = false`: `UnityLLM.CreateNPCContext(npcID, npcPersonality)` → `UnityLLMContextHasher.HashNPC()` + - `_tcp = true`: async TCP connection → `Hasher.HashNPC()` +5. **Conversation Management**: Context-aware dialog generation via `LLM_NPCController.getDialog()` +6. **Cleanup**: + - `_tcp = false`: `ctx.Close()` in `OnDestroy()` + - `_tcp = true`: `stopRetrying = true` in `OnDestroy()` ### Interaction System - **Proximity Detection**: InteractManager components detect player presence diff --git a/Assets/Scripts/README.md b/Assets/Scripts/README.md index 70df333..731e7b8 100644 --- a/Assets/Scripts/README.md +++ b/Assets/Scripts/README.md @@ -14,9 +14,15 @@ The game operates on a client-server architecture where Unity (C#) handles game - Delegates update calls to appropriate controllers based on current state - Handles transitions between free exploration and conversation modes - Integrates with DialogManager for seamless UI state management -- **Required Component**: Must have AuthManager component attached for IPC authentication +- **Required Component**: Must have AuthManager component attached for IPC authentication (TCP path only) - Coordinates with AuthManager for secure AI server communication +### constData.cs +**Compile-time feature flags** controlling which AI backend is active. +- `_tcp` (`const bool`, default `false`): `false` = llama.cpp in-process via LLamaSharp; `true` = legacy Python TCP server +- Because the value is a `const`, the compiler dead-code-eliminates the inactive branch — zero runtime overhead +- **Rename note**: Previously named `USING_TCP`; renamed to `_tcp` for consistency + ### GameLayers.cs **Unity layer management system** providing centralized access to collision layers. - Singleton pattern for global layer access @@ -25,12 +31,12 @@ The game operates on a client-server architecture where Unity (C#) handles game - Critical for movement validation and interaction detection ### Hasher.cs -**Connection management system** for AI NPC network connections. +**TCP connection management system** for AI NPC network connections (legacy `_tcp` path). - Maintains hash table mapping NPC GUIDs to TCP connections (`Dictionary`) +- Only used when `constData._tcp = true`; the llama.cpp path uses `UnityLLMContextHasher` instead - Singleton pattern for global connection access - Handles connection lifecycle management and cleanup - Provides connection validation and retrieval methods -- Essential for multi-NPC AI communication architecture ### ConnectionInfo.cs **Network connection wrapper** encapsulating TCP client and stream management. @@ -86,12 +92,13 @@ The game operates on a client-server architecture where Unity (C#) handles game - Physics-based game mechanics ### `/ServerFiles` -**Network communication layer** for Unity-Python integration with mandatory IPC authentication. +**Network communication layer** for Unity-Python integration with mandatory IPC authentication. Used only when `constData._tcp = true`. - Socket client implementation for AI communication with token-based authentication - Connection management and request handling with authentication handshakes - Protocol definition for AI service communication with session validation - **Authentication Required**: All connections must authenticate via AuthManager IPC system - Supports secure multi-NPC concurrent connections with individual session tokens +- **Legacy path**: Primary inference now handled by `/UnityAIScripts` via LLamaSharp ### `/ServerFiles-API` **Extended API communication** for additional server functionality. @@ -99,16 +106,20 @@ The game operates on a client-server architecture where Unity (C#) handles game - Extended server communication protocols - Additional network service integrations +### `/UnityAIScripts` +**In-process LLM integration** using LLamaSharp (llama.cpp). Active when `constData._tcp = false` (default). +- `UnityLLM`: singleton model manager; exposes `CreateNPCContext()` factory and `talk2LLMWithContext()` inference +- `UnityLLMContextHasher`: GUID-keyed dictionary of per-NPC `LLamaContext` instances +- `NPCContext` / `NPCContext_intf`: plain C# data container for per-NPC conversation state + ### `/UnityEngineHelper` **Unity Editor integration utilities** for development workflow enhancement. - Domain reload management and resource cleanup - Editor-specific development tools and utilities - Development-time workflow support and debugging tools -- **DomainReloadHelper**: Prevents editor hanging during assembly reloads by managing IPC and network resource cleanup +- **DomainReloadHelper**: Prevents editor hanging during assembly reloads by managing IPC and network resource cleanup; TCP cleanup only runs when `constData._tcp = true` ## Technical Design Patterns - -### Singleton Pattern Multiple systems use singleton pattern for global access: - `GameLayers` for collision layer management - `Hasher` for connection management diff --git a/Assets/Scripts/ServerFiles/README.md b/Assets/Scripts/ServerFiles/README.md index 42a9988..f4443c6 100644 --- a/Assets/Scripts/ServerFiles/README.md +++ b/Assets/Scripts/ServerFiles/README.md @@ -1,4 +1,6 @@ -# ServerFiles - Network Communication Layer +# ServerFiles - Network Communication Layer (Legacy TCP Path) + +> **Note**: This system is the legacy AI communication path. It is active only when `constData._tcp = true`. The primary inference path is now in `/UnityAIScripts` via LLamaSharp (llama.cpp in-process). Set `constData._tcp = false` (default) to use the llama.cpp path without starting any Python process. This directory contains the core network communication systems that enable Unity-Python integration for AI-powered NPC conversations. The system implements TCP socket communication with connection pooling and async request handling for seamless LLM integration. diff --git a/Assets/Scripts/UnityAIScripts/README.md b/Assets/Scripts/UnityAIScripts/README.md index 1774a6f..c587d23 100644 --- a/Assets/Scripts/UnityAIScripts/README.md +++ b/Assets/Scripts/UnityAIScripts/README.md @@ -16,27 +16,29 @@ This design minimizes memory overhead while maintaining independent conversation ### UnityLLM.cs **Singleton model manager and single point of truth for LLM resources** -- **Purpose**: Loads and manages the shared LLamaSharp model instance for all AI NPCs +- **Purpose**: Loads and manages the shared LLamaSharp model instance for all AI NPCs; exposes factory and per-NPC inference methods - **Model Configuration**: - Model path: `Llama-3.2-1B-Instruct-Q4_K_M.gguf` (quantized 4-bit model) - Context size: 1024 tokens for conversation memory - GPU acceleration: 5 layers offloaded to GPU (configurable based on VRAM) - Model format: GGUF format from Unsloth optimized for inference - **Technical Details**: - - Static model instance (`LLamaWeights`) loaded once at initialization + - `model` and `parameters` are **public static** — shared across all NPCs and accessible by the factory + - No class-level context, executor, or chatHistory fields — all state is per-NPC - Singleton pattern for global LLM service access - - Async initialization in `Awake()` for non-blocking model loading - - Default context creation for testing/demonstration purposes + - `Awake()` is `async void` (Unity-compatible) - **Initialization Process**: - Model file loaded from StreamingAssets at startup - - Model parameters configured (context size, GPU layers) - - Test conversation executed to validate model functionality - - Instance reference stored for global access + - If `constData._tcp = true`: runs a legacy test conversation (Bob prompt) for TCP path validation + - If `constData._tcp = false`: logs that per-NPC context mode is active +- **Public API**: + - `CreateNPCContext(GUID npcId, string systemPrompt)` — **static factory**: creates a fresh `LLamaContext`, `InteractiveExecutor`, and `ChatHistory` seeded with `systemPrompt`; returns `NPCContext` + - `talk2LLMWithContext(NPCContext_intf ctx, string user)` — **per-NPC inference**: builds `ChatSession` from NPC's own executor and history, streams response, updates `LastAccessed` + - `talk2LLM(string user)` — **legacy, `_tcp` path only**: creates a fresh shared context per call; returns `string.Empty` when `_tcp = false` - **Memory Management**: - Single model instance reduces RAM usage (vs per-NPC models) - Model remains loaded for application lifetime - - Context creation on-demand for each NPC - - Shared model weights across all inference operations + - Each NPC gets its own `LLamaContext` via factory — no shared state between NPCs ### UnityLLMContextHasher.cs **Context lifecycle manager with GUID-based NPC context hashing** @@ -105,24 +107,23 @@ This design minimizes memory overhead while maintaining independent conversation - Timestamp tracking enables LRU cache eviction strategies - Null assignment prevents dangling references to heavy objects -## Technical Implementation +### Technical Implementation ### Model Loading and Initialization The system loads the LLM model once during application startup: 1. **Path Resolution**: Model file located in StreamingAssets with full snapshot path 2. **Parameter Configuration**: Context size and GPU layer allocation specified 3. **Model Loading**: `LLamaWeights.LoadFromFile()` loads quantized GGUF model into memory -4. **Context Creation**: Default context created from model for testing -5. **Validation**: Test conversation executed to ensure model functionality +4. **Validation** (TCP path only): Test conversation executed if `constData._tcp = true` ### Context Creation Workflow -When a new AI NPC needs LLM capabilities: -1. **Context Initialization**: Create `NPCContext` with NPC-specific configuration -2. **Executor Assignment**: `InteractiveExecutor` created from shared model context -3. **History Setup**: `ChatHistory` initialized with system prompt for personality -4. **Parameter Configuration**: `InferenceParams` set with token limits and stop sequences -5. **Context Registration**: Context hashed in `UnityLLMContextHasher` by NPC GUID -6. **Retrieval**: NPC controller retrieves context via GUID for conversation execution +When a new AI NPC starts (`NPCController.Start()`): +1. `NPCController` calls `UnityLLM.CreateNPCContext(npcID, personalityPrompt)` +2. Factory creates fresh `LLamaContext` from shared `model`, new `InteractiveExecutor`, and `ChatHistory` seeded with system prompt +3. `NPCContext` returned and registered in `UnityLLMContextHasher` keyed by NPC GUID +4. On player message, `LLM_NPCController.getDialog()` retrieves context by GUID and calls `talk2LLMWithContext()` +5. `ChatSession` is created from the NPC's own executor + history — responses stay fully isolated +6. On NPC destroy, `NPCController.OnDestroy()` calls `ctx.Close()` to release the `LLamaContext` ### Context Switching and Management The system supports multiple concurrent NPC conversations: diff --git a/Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md b/Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md index a745c26..152bfa7 100644 --- a/Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md +++ b/Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md @@ -11,7 +11,62 @@ This file is used by agentic models to log analysis, observations, and insights --- - +## 2026-03-15 - GitHub Copilot (Claude Sonnet 4.6) - Per-NPC Context Implementation + +### Component: UnityLLM.cs +**Observation**: The two recommendations from the 2026-01-18 Model Sharing Architecture entry have been fully implemented. + +**Changes Made**: +- `model` and `parameters` changed from `private static` to `public static` — accessible by factory and by NPC registration code +- Removed shared class-level `context`, `executor`, `chatHistory`, `inferenceParams` fields — no class-level inference state remains +- Removed static `freshContext` / `freshExec` fields — these were the root cause of shared context bleed between NPCs +- `Awake()` changed from `async Task` to `async void` (Unity-compatible lifecycle) +- Legacy test conversation in `Awake()` wrapped in `#pragma warning disable CS0162` + `if (constData._tcp)` guard +- Added `CreateNPCContext(GUID npcId, string systemPrompt)` static factory +- Added `talk2LLMWithContext(NPCContext_intf ctx, string user)` per-NPC inference method +- `talk2LLM(string user)` body wrapped in `if (constData._tcp)` guard; returns `string.Empty` on llama.cpp path +- Added `using UnityEditor;` for `GUID` type resolution + +**Impact**: Each AI NPC now has a fully isolated `LLamaContext` + `InteractiveExecutor` + `ChatHistory`. Conversation histories cannot bleed between characters. The shared model weights (`LLamaWeights`) remain loaded once for the application lifetime. + +--- + +### Component: NPCController.cs +**Observation**: NPC registration with `UnityLLMContextHasher` wired up alongside existing TCP path. + +**Changes Made**: +- Added `npcPersonality` field to capture personality string from `dialogBecomesContext()` +- `dialogBecomesContext()` now stores the personality string in `npcPersonality` before passing to `Dialog` +- `Start()` `else` branch (when `_tcp` is `false`): calls `UnityLLM.CreateNPCContext(npcID, npcPersonality)` and `UnityLLMContextHasher.Instance.HashNPC(npcID, ctx)` +- `OnDestroy()` branched: TCP path stops retrying, llama.cpp path calls `UnityLLMContextHasher.Instance.getNPCContext(npcID)?.Close()` +- TCP `establishAndStoreConnection()` call wrapped with `#pragma warning disable CS0162` to suppress dead-code warning + +**Impact**: Every `NPC_AI`-tagged NPC registers its own context on startup and cleans it up on destroy. + +--- + +### Component: LLM_NPCController.cs +**Observation**: `getDialog()` now dispatches correctly on `constData._tcp`. + +**Changes Made**: +- TCP branch: restored previously-commented-out `ServerSocketC.Instance.NPCRequest()` call via `Hasher.getNPCConnection(npcID)` + `reformatDialog()` +- llama.cpp branch: retrieves `NPCContext_intf` from `UnityLLMContextHasher` by GUID, calls `UnityLLM.Instance.talk2LLMWithContext(ctx, userSpeech[^1])` +- `reformatDialog()` is only invoked in the TCP branch (it produces the `Invoke:::` wire format not needed by llama.cpp's native `ChatSession`) + +**Impact**: Both paths compile and work. Switching `constData._tcp` is the only change needed to toggle between them. + +--- + +### Component: constData.cs +**Observation**: `USING_TCP` renamed to `_tcp` for cleaner namespacing across the codebase. + +**Changes Made**: +- `public const bool USING_TCP = false;` → `public const bool _tcp = false;` +- All 5 call sites updated: `ServerSocketC.cs`, `NPCController.cs`, `AuthManager.cs`, `DomainReloadHelper.cs`, `Killports.cs` + +**Impact**: Consistent naming; the `const` nature means the compiler eliminates inactive branches at compile time with zero runtime cost. + +--- ## 2026-01-18 - Initial System Analysis diff --git a/LLM.md b/LLM.md index fd7ecbe..8e2428b 100644 --- a/LLM.md +++ b/LLM.md @@ -1,3 +1,60 @@ -For bundling the model use `https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-GGUF?show_file_info=Llama-3.2-1B-Instruct-Q4_K_M.gguf&library=llama-cpp-python` and install it using llama-cpp-python library. +# LLM Setup -Add this model from `C:\Users\\.cache\huggingface\hub`and add it to Assets/StreamingAssets/Models \ No newline at end of file +## Model + +Download `Llama-3.2-1B-Instruct-Q4_K_M.gguf` from HuggingFace (Unsloth): +`https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-GGUF?show_file_info=Llama-3.2-1B-Instruct-Q4_K_M.gguf&library=llama-cpp-python` + +After download the file will be at: +`C:\Users\\.cache\huggingface\hub\models--unsloth--Llama-3.2-1B-Instruct-GGUF\snapshots\\` + +Copy/move it into: +`Assets/StreamingAssets/Models/models--unsloth--Llama-3.2-1B-Instruct-GGUF/snapshots//` + +The exact path is configured in `UnityLLM.cs` → `modelPath`. + +--- + +## Runtime Path (`constData._tcp = false`) — Default + +Inference runs **in-process** inside Unity via the **LLamaSharp** plugin (llama.cpp bindings). No Python process is started. + +### How per-NPC context works +1. `UnityLLM` loads the shared `LLamaWeights` once on startup (`public static model`). +2. Each AI NPC calls `UnityLLM.CreateNPCContext(npcID, systemPrompt)` in `Start()`, which creates a fresh `LLamaContext` + `InteractiveExecutor` + `ChatHistory` seeded with the NPC's personality. +3. The context is registered in `UnityLLMContextHasher` keyed by the NPC's Unity GUID. +4. On each player message, `LLM_NPCController.getDialog()` retrieves the NPC's context and calls `UnityLLM.talk2LLMWithContext()`, so conversation history is fully isolated per character. +5. On NPC destroy, `NPCController.OnDestroy()` calls `ctx.Close()` to free the `LLamaContext`. + +--- + +## Legacy Path (`constData._tcp = true`) + +Uses the Python TCP server (`ServerSocketPython.py`) with Ollama + LLaMA 3.2 via `langchain-ollama`. + +### Python requirements +``` +ollama +langchain +langchain-ollama +torch +``` + +Run `pip install -r requirements.txt` inside `Assets/Scripts/ServerFiles/`. + +The Unity side spawns the Python process automatically on play. Authentication uses Windows Named Pipes (IPC) + HMAC-SHA256 request tokens. + +--- + +## `constData._tcp` Flag + +Location: `Assets/Scripts/constData.cs` + +```csharp +public class constData +{ + public const bool _tcp = false; // false = llama.cpp in-process | true = Python TCP server +} +``` + +Because `_tcp` is a **compile-time constant**, the compiler dead-code-eliminates the inactive branch with zero runtime overhead. Change the value and recompile to switch paths. \ No newline at end of file diff --git a/README.md b/README.md index a173829..36d6aa2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # LLM-Powered Game Development -A Unity-based game that leverages a local LLM (e.g., LLAMA) for NPC decision-making and dialogue, using a TCP communication layer between Unity (C#) and Python. +A Unity-based game that leverages a local LLM (LLaMA 3.2 via llama.cpp / LLamaSharp) for NPC decision-making and dialogue. The primary inference path runs fully in-process via the LLamaSharp Unity plugin (no Python server required). A legacy TCP communication layer (Unity C# ↔ Python) is preserved behind the `constData._tcp` compile-time flag for reference and fallback. ## Important NOTE: If the compile time/reloading domain time/enter play mode time are too long, then the issue is in your script, not in Unity. If certain scripts need to act in a certain way but are also causing these issues, the DomanReloadHelper.cs is an example of how you could build helper files for Unity. @@ -21,8 +21,9 @@ If the compile time/reloading domain time/enter play mode time are too long, the ## Features -- **Persistent TCP Connections with IPC handshake**: Reuse the same client connections for multiple requests. -- **LLM-Driven NPC Dialogue**: NPC conversations are generated by the LLM. +- **In-Process LLM Inference (llama.cpp / LLamaSharp)**: Model runs directly inside Unity — no Python server or network hop required. +- **Legacy TCP + IPC Authentication (preserved)**: Persistent TCP connections with Windows Named Pipe handshake and HMAC-SHA256 token auth — kept fully functional behind `_tcp = true`. +- **LLM-Driven NPC Dialogue**: NPC conversations are generated dynamically by the LLM with per-NPC personality system prompts. - **Custom Game Mechanics**: Collision physics and movement systems built from scratch. - **Dynamic NPC Generation**: Easily add new NPCs via scripting. - **Debug HTTP API (FastAPI)**: Alternative debugging interface (higher overhead). @@ -31,9 +32,13 @@ If the compile time/reloading domain time/enter play mode time are too long, the ## Current Progress -- ✅ Using pipes for in-memory handshake before establishing TCP communication. -- ✅ Reliable connection management with delays and retry logic. -- ✅ TCP server implementation for Python ↔ Unity communication. +- ✅ **llama.cpp in-process inference** via LLamaSharp — no Python dependency at runtime. +- ✅ **Per-NPC independent conversation contexts** — each AI NPC registers its own `NPCContext` on `Start()` via `UnityLLM.CreateNPCContext()`. +- ✅ **`constData._tcp` flag** — single constant toggles entire TCP vs llama.cpp code paths (dead-code-eliminated by the compiler when `false`). +- ✅ **`UnityLLMContextHasher`** — GUID-keyed dictionary provides isolated context retrieval per NPC. +- ✅ Using pipes for in-memory handshake before establishing TCP communication (legacy path). +- ✅ Reliable connection management with delays and retry logic (legacy path). +- ✅ TCP server implementation for Python ↔ Unity communication (legacy path). - ✅ NPC dialog integration with the LLM. - ✅ Ports cleanly closed on application shutdown. - ✅ Dynamic component injection for NPC prefabs. diff --git a/ReadmeTodo.md b/ReadmeTodo.md index 868fdc9..8a15a2e 100644 --- a/ReadmeTodo.md +++ b/ReadmeTodo.md @@ -37,6 +37,30 @@ How these changes will improve documentation quality and user understanding. +## 2026-03-15 - GitHub Copilot (Claude Sonnet 4.6) - llama.cpp migration + per-NPC context + +**README Files Modified**: +- `README.md` (root) +- `LLM.md` +- `Assets/Scripts/README.md` +- `Assets/Scripts/NPC/README.md` +- `Assets/Scripts/ServerFiles/README.md` +- `Assets/Scripts/UnityAIScripts/README.md` +- `Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md` + +**Modification Type**: Enhancement / Correction + +**Reason**: Migration from Python TCP server to llama.cpp in-process inference (LLamaSharp). Per-NPC independent context system implemented and wired. `constData.USING_TCP` renamed to `constData._tcp`. All documentation updated to reflect active architecture. + +**Changes**: +1. `README.md`: Updated title, features list, and current progress to reflect llama.cpp as primary path +2. `LLM.md`: Full rewrite — model setup, llama.cpp context flow (step-by-step), legacy TCP path instructions, `_tcp` flag explanation +3. `Assets/Scripts/README.md`: Added `constData.cs` entry documenting `_tcp` flag; updated `Hasher.cs` to note TCP-only scope; added `/UnityAIScripts` directory entry; updated `/ServerFiles` and `DomainReloadHelper` notes +4. `Assets/Scripts/NPC/README.md`: Updated architecture overview, `NPCController` AI integration section, `LLM_NPCController` dispatch description, and AI NPC lifecycle steps for both paths +5. `Assets/Scripts/ServerFiles/README.md`: Added legacy-path notice at top +6. `Assets/Scripts/UnityAIScripts/README.md`: Updated `UnityLLM.cs` section to document `public static model/parameters`, factory, `talk2LLMWithContext`, legacy gate; updated context creation workflow +7. `Assets/Scripts/UnityAIScripts/UnityAIScripts_Logging.md`: Added full implementation log entry for all changed components + ----------------- ## Guidelines for Contributors From 9694b4cfae34d15bea842f8531b45df9aaf1a5d3 Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sun, 15 Mar 2026 12:28:55 -0500 Subject: [PATCH 10/13] Updated NPC logging --- Assets/Scripts/NPC/NPC_Logging.md | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/Assets/Scripts/NPC/NPC_Logging.md b/Assets/Scripts/NPC/NPC_Logging.md index b3489ee..2a5a5ea 100644 --- a/Assets/Scripts/NPC/NPC_Logging.md +++ b/Assets/Scripts/NPC/NPC_Logging.md @@ -27,3 +27,47 @@ This file is used by agentic models to log analysis, observations, and insights --- + +## 2026-03-15 - GitHub Copilot (Claude Sonnet 4.6) - Per-NPC llama.cpp Context Wiring + +### Component: NPCController.cs +**Observation**: `NPCController` was updated to support both the llama.cpp in-process path and the legacy TCP path, gated by `constData._tcp`. + +**Changes Made**: +- Added `npcPersonality` field (`private string`) to capture the personality string generated by `LLM_NPCController.generatePersonality()` so it can be passed to the context factory +- `dialogBecomesContext()` now stores the personality string in `npcPersonality` before passing it to `dialog.initFirst()` — previously the string was discarded immediately +- `Start()` now branches on `constData._tcp`: + - `_tcp = true` (legacy): `establishAndStoreConnection()` fires (fire-and-forget via discard `_ =`), registers `TcpClient` with `Hasher` + - `_tcp = false` (default/llama.cpp): calls `UnityLLM.CreateNPCContext(npcID, npcPersonality)` and registers the resulting `NPCContext` with `UnityLLMContextHasher.Instance.HashNPC()` +- `OnDestroy()` now branches on `constData._tcp`: + - `_tcp = true`: sets `stopRetrying = true` + - `_tcp = false`: calls `UnityLLMContextHasher.Instance.getNPCContext(npcID)?.Close()` to release the `LLamaContext` +- Both deprecated branches wrapped with `#pragma warning disable CS0162` to suppress compiler dead-code warnings (expected when `_tcp` is a compile-time `const`) + +**Impact**: +- Every `NPC_AI`-tagged NPC now owns and manages its own `LLamaContext` lifecycle +- Personality system prompts are correctly forwarded to the context factory, giving each NPC consistent persona across conversation turns +- Proper cleanup prevents `LLamaContext` objects from leaking after scene unload or NPC destroy + +**Recommendations**: +- If NPCs are dynamically spawned and destroyed frequently, consider monitoring `UnityLLMContextHasher` dictionary size — contexts should be evicted when NPC is destroyed (now handled), but the dictionary entry itself persists until `OnApplicationQuit` + +--- + +### Component: LLM_NPCController.cs +**Observation**: `getDialog()` previously hardcoded the `UnityLLM.talk2LLM()` call with no per-NPC context. Updated to dispatch on `constData._tcp`. + +**Changes Made**: +- `_tcp = true` branch: restored the previously-commented-out TCP path — calls `Hasher.Instance.getNPCConnection(npcID)` to retrieve the NPC's `TcpClient` and `NetworkStream`, formats the request via `reformatDialog()` (produces `Invoke:::prompt:::Context:::history` wire format), sends via `ServerSocketC.Instance.NPCRequest()` +- `_tcp = false` branch: retrieves `NPCContext_intf ctx` from `UnityLLMContextHasher.Instance.getNPCContext(npcID)` by GUID, calls `UnityLLM.Instance.talk2LLMWithContext(ctx, userSpeech[^1])` — history is maintained natively in the NPC's `ChatSession` +- Removed direct call to `UnityLLM.Instance.talk2LLM()` from the main flow (it is now TCP-only and gated) +- `reformatDialog()` is only invoked in the TCP branch; its `Invoke:::` format is not needed for llama.cpp + +**Impact**: +- Conversation history is now fully isolated per NPC on the llama.cpp path — `talk2LLMWithContext` appends to the NPC's own `ChatHistory` on every turn +- TCP path is fully restored and functional for legacy testing +- The TCP debug log `"Still connected to NPC: ..."` now only fires on the TCP branch, eliminating spurious connection-check errors on the llama.cpp path + +**Recommendations**: +- `reformatDialog()` could be moved inside the TCP branch body or marked with a comment clarifying it is TCP-only, to avoid confusion for future contributors + From 97174889b21385b183a8c87deb97c27bb6652762 Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sun, 15 Mar 2026 12:32:03 -0500 Subject: [PATCH 11/13] Debug flags to understand the LLM better --- Assets/Scripts/UnityAIScripts/UnityLLM.cs | 2 +- Assets/Scripts/constData.cs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Assets/Scripts/UnityAIScripts/UnityLLM.cs b/Assets/Scripts/UnityAIScripts/UnityLLM.cs index 62710dd..6c5267b 100644 --- a/Assets/Scripts/UnityAIScripts/UnityLLM.cs +++ b/Assets/Scripts/UnityAIScripts/UnityLLM.cs @@ -29,7 +29,7 @@ private async void Awake() { Instance = this; - if (constData._tcp) + if (constData._llmDebug) { #pragma warning disable CS0162 // Legacy: startup test conversation for validating the TCP/server path diff --git a/Assets/Scripts/constData.cs b/Assets/Scripts/constData.cs index 00d06c7..d183825 100644 --- a/Assets/Scripts/constData.cs +++ b/Assets/Scripts/constData.cs @@ -3,4 +3,5 @@ public class constData { public const bool _tcp = false; + public const bool _llmDebug = true; } From 3918c4b0afea365a2c292963ae94fdc04a1a07ca Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sun, 15 Mar 2026 13:09:44 -0500 Subject: [PATCH 12/13] Editor version update and chat prompt enhancements --- Assets/Scripts/NPC/LLM_NPCController.cs | 8 +++-- Assets/Scripts/UnityAIScripts/NPCContext.cs | 3 ++ .../Scripts/UnityAIScripts/NPCContext_intf.cs | 1 + Assets/Scripts/UnityAIScripts/UnityLLM.cs | 8 +++-- Packages/manifest.json | 20 ++++++------- Packages/packages-lock.json | 30 +++++++++---------- 6 files changed, 40 insertions(+), 30 deletions(-) diff --git a/Assets/Scripts/NPC/LLM_NPCController.cs b/Assets/Scripts/NPC/LLM_NPCController.cs index 6653892..44e6158 100644 --- a/Assets/Scripts/NPC/LLM_NPCController.cs +++ b/Assets/Scripts/NPC/LLM_NPCController.cs @@ -44,8 +44,12 @@ public async Task getDialog(List userSpeech, GUID npcID){ else { NPCContext_intf ctx = UnityLLMContextHasher.Instance.getNPCContext(npcID); - Debug.Log("Sending to LLM ----------- " + userSpeech[^1]); - dialog = await UnityLLM.Instance.talk2LLMWithContext(ctx, userSpeech[^1]); + // On the very first turn dialog.Lines = [systemPrompt] only. + // Sending the system prompt as a User message confuses the model; + // use a neutral opener so the NPC introduces itself from its personality. + string userMsg = userSpeech.Count == 1 ? "Hello" : userSpeech[^1]; + Debug.Log("Sending to LLM ----------- " + userMsg); + dialog = await UnityLLM.Instance.talk2LLMWithContext(ctx, userMsg); } Debug.Log("Got back from LLM --------- " + dialog); return dialog; diff --git a/Assets/Scripts/UnityAIScripts/NPCContext.cs b/Assets/Scripts/UnityAIScripts/NPCContext.cs index ed84f65..c9367b0 100644 --- a/Assets/Scripts/UnityAIScripts/NPCContext.cs +++ b/Assets/Scripts/UnityAIScripts/NPCContext.cs @@ -9,6 +9,7 @@ public class NPCContext : NPCContext_intf public GUID NpcId { get; set; } public ChatHistory History { get; set; } public InteractiveExecutor Executor { get; set; } + public ChatSession Session { get; set; } public InferenceParams InferenceParams { get; set; } public string SystemPrompt { get; set; } public DateTime LastAccessed { get; set; } @@ -18,6 +19,7 @@ public NPCContext(GUID npcId, ChatHistory history, InteractiveExecutor executor, NpcId = npcId; History = history; Executor = executor; + Session = new ChatSession(executor, history); // created once; reused every turn InferenceParams = inferenceParams; SystemPrompt = systemPrompt; LastAccessed = DateTime.Now; @@ -35,6 +37,7 @@ private void OnDestroy() public void Close() { + Session = null; Executor = null; History = null; Debug.Log("NPCContext closed for NPC ID: " + NpcId); diff --git a/Assets/Scripts/UnityAIScripts/NPCContext_intf.cs b/Assets/Scripts/UnityAIScripts/NPCContext_intf.cs index 41a15d6..f7ed795 100644 --- a/Assets/Scripts/UnityAIScripts/NPCContext_intf.cs +++ b/Assets/Scripts/UnityAIScripts/NPCContext_intf.cs @@ -9,6 +9,7 @@ public interface NPCContext_intf GUID NpcId { get; set; } ChatHistory History { get; set; } InteractiveExecutor Executor { get; set; } + ChatSession Session { get; set; } InferenceParams InferenceParams { get; set; } string SystemPrompt { get; set; } DateTime LastAccessed { get; set; } diff --git a/Assets/Scripts/UnityAIScripts/UnityLLM.cs b/Assets/Scripts/UnityAIScripts/UnityLLM.cs index 6c5267b..cae13a3 100644 --- a/Assets/Scripts/UnityAIScripts/UnityLLM.cs +++ b/Assets/Scripts/UnityAIScripts/UnityLLM.cs @@ -56,7 +56,9 @@ in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, "Can you write a p } else { +#pragma warning disable CS0162 UnityEngine.Debug.Log("UnityLLM: per-NPC context mode (llama.cpp). Shared model loaded."); +#pragma warning restore CS0162 } } @@ -101,13 +103,13 @@ public static NPCContext CreateNPCContext(GUID npcId, string systemPrompt) ); } - // Per-NPC inference — uses the NPC's own context so histories never bleed + // Per-NPC inference — reuses the single ChatSession stored on the context so the + // InteractiveExecutor KV cache is never replayed from scratch on each turn. public async Task talk2LLMWithContext(NPCContext_intf ctx, string user) { - ChatSession session = new(ctx.Executor, ctx.History); string prompt = user.Length > 0 ? user : "Hello"; string resp = string.Empty; - await foreach (string text in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, prompt), ctx.InferenceParams)) + await foreach (string text in ctx.Session.ChatAsync(new ChatHistory.Message(AuthorRole.User, prompt), ctx.InferenceParams)) { resp += text; } diff --git a/Packages/manifest.json b/Packages/manifest.json index 02dd867..653deba 100644 --- a/Packages/manifest.json +++ b/Packages/manifest.json @@ -2,17 +2,17 @@ "dependencies": { "com.cysharp.unitask": "https://github.com/Cysharp/UniTask.git?path=src/UniTask/Assets/Plugins/UniTask", "com.github-glitchenzo.nugetforunity": "https://github.com/GlitchEnzo/NuGetForUnity.git?path=/src/NuGetForUnity", - "com.unity.collab-proxy": "2.8.2", - "com.unity.feature.2d": "2.0.1", - "com.unity.ide.rider": "3.0.36", - "com.unity.ide.visualstudio": "2.0.23", - "com.unity.inputsystem": "1.14.0", - "com.unity.multiplayer.center": "1.0.0", - "com.unity.render-pipelines.universal": "17.0.4", - "com.unity.test-framework": "1.5.1", - "com.unity.timeline": "1.8.7", + "com.unity.collab-proxy": "2.11.4", + "com.unity.feature.2d": "2.0.2", + "com.unity.ide.rider": "3.0.39", + "com.unity.ide.visualstudio": "2.0.26", + "com.unity.inputsystem": "1.19.0", + "com.unity.multiplayer.center": "1.0.1", + "com.unity.render-pipelines.universal": "17.3.0", + "com.unity.test-framework": "1.6.0", + "com.unity.timeline": "1.8.11", "com.unity.ugui": "2.0.0", - "com.unity.visualscripting": "1.9.7", + "com.unity.visualscripting": "1.9.10", "com.unity.modules.accessibility": "1.0.0", "com.unity.modules.ai": "1.0.0", "com.unity.modules.androidjni": "1.0.0", diff --git a/Packages/packages-lock.json b/Packages/packages-lock.json index 51e129a..2b20aa9 100644 --- a/Packages/packages-lock.json +++ b/Packages/packages-lock.json @@ -15,11 +15,11 @@ "hash": "c2af83c9d4f8cdaada9d4a0e94de2f195d8e1d01" }, "com.unity.2d.animation": { - "version": "13.0.2", + "version": "13.0.4", "depth": 1, "source": "registry", "dependencies": { - "com.unity.2d.common": "12.0.1", + "com.unity.2d.common": "12.0.2", "com.unity.2d.sprite": "1.0.0", "com.unity.collections": "2.4.3", "com.unity.modules.animation": "1.0.0", @@ -41,7 +41,7 @@ "url": "https://packages.unity.com" }, "com.unity.2d.common": { - "version": "12.0.1", + "version": "12.0.2", "depth": 2, "source": "registry", "dependencies": { @@ -113,17 +113,17 @@ "url": "https://packages.unity.com" }, "com.unity.2d.tooling": { - "version": "1.0.0", + "version": "1.0.2", "depth": 1, "source": "registry", "dependencies": { - "com.unity.2d.common": "12.0.1", + "com.unity.2d.common": "12.0.2", "com.unity.modules.uielements": "1.0.0" }, "url": "https://packages.unity.com" }, "com.unity.burst": { - "version": "1.8.27", + "version": "1.8.28", "depth": 2, "source": "registry", "dependencies": { @@ -133,7 +133,7 @@ "url": "https://packages.unity.com" }, "com.unity.collab-proxy": { - "version": "2.8.2", + "version": "2.11.4", "depth": 0, "source": "registry", "dependencies": {}, @@ -163,7 +163,7 @@ "depth": 0, "source": "builtin", "dependencies": { - "com.unity.2d.animation": "13.0.2", + "com.unity.2d.animation": "13.0.4", "com.unity.2d.pixel-perfect": "5.1.1", "com.unity.2d.psdimporter": "12.0.1", "com.unity.2d.sprite": "1.0.0", @@ -171,11 +171,11 @@ "com.unity.2d.tilemap": "1.0.0", "com.unity.2d.tilemap.extras": "6.0.1", "com.unity.2d.aseprite": "3.0.1", - "com.unity.2d.tooling": "1.0.0" + "com.unity.2d.tooling": "1.0.2" } }, "com.unity.ide.rider": { - "version": "3.0.36", + "version": "3.0.39", "depth": 0, "source": "registry", "dependencies": { @@ -184,16 +184,16 @@ "url": "https://packages.unity.com" }, "com.unity.ide.visualstudio": { - "version": "2.0.23", + "version": "2.0.26", "depth": 0, "source": "registry", "dependencies": { - "com.unity.test-framework": "1.1.9" + "com.unity.test-framework": "1.1.33" }, "url": "https://packages.unity.com" }, "com.unity.inputsystem": { - "version": "1.14.0", + "version": "1.19.0", "depth": 0, "source": "registry", "dependencies": { @@ -292,7 +292,7 @@ "url": "https://packages.unity.com" }, "com.unity.timeline": { - "version": "1.8.7", + "version": "1.8.11", "depth": 0, "source": "registry", "dependencies": { @@ -313,7 +313,7 @@ } }, "com.unity.visualscripting": { - "version": "1.9.7", + "version": "1.9.10", "depth": 0, "source": "registry", "dependencies": { From cfb62eecc8003d246c84a4b47e5a237d1176d713 Mon Sep 17 00:00:00 2001 From: Aditya YV Date: Sun, 15 Mar 2026 13:22:35 -0500 Subject: [PATCH 13/13] Logs --- Assets/Scripts/NPC/NPC_Logging.md | 38 +++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Assets/Scripts/NPC/NPC_Logging.md b/Assets/Scripts/NPC/NPC_Logging.md index 2a5a5ea..f83ef87 100644 --- a/Assets/Scripts/NPC/NPC_Logging.md +++ b/Assets/Scripts/NPC/NPC_Logging.md @@ -71,3 +71,41 @@ This file is used by agentic models to log analysis, observations, and insights **Recommendations**: - `reformatDialog()` could be moved inside the TCP branch body or marked with a comment clarifying it is TCP-only, to avoid confusion for future contributors +--- + +## 2026-03-15 - GitHub Copilot (Claude Sonnet 4.6) - First-turn prompt bug fix + ChatSession lifetime fix + +### Component: LLM_NPCController.cs +**Observation**: `getDialog()` was sending `userSpeech[^1]` unconditionally on every turn. On the first turn, `dialog.Lines` only contains the system prompt string, so `userSpeech[^1]` resolved to the personality description (e.g. `"You are the first npc in this game who is connected to an LLM"`). This was sent to the model as a *User* message, causing the LLM to respond as if the player had just said that text, producing off-character story-mode output. + +**Impact**: +- First NPC response was completely wrong — model roleplayed the personality description as player input instead of adopting it as its own character +- Subsequent turns appeared to work but were building on a corrupted conversation start + +**Changes Made**: +- Added first-turn detection: `string userMsg = userSpeech.Count == 1 ? "Hello" : userSpeech[^1];` +- On first turn (only the system prompt in Lines), a neutral `"Hello"` is sent so the NPC introduces itself naturally from its system prompt +- All subsequent turns send the actual player text as before + +**Recommendations**: +- If NPCs need a custom opening line instead of a generic greeting, `generatePersonality()` could return a struct with both the system prompt and an optional opening user seed message + +--- + +### Component: NPCContext.cs, NPCContext_intf.cs, UnityLLM.cs (talk2LLMWithContext) +**Observation**: `talk2LLMWithContext()` was calling `new ChatSession(ctx.Executor, ctx.History)` on every invocation. `LLamaSharp`'s `InteractiveExecutor` maintains a live KV cache after inference. Constructing a new `ChatSession` on top of an existing KV cache caused the full `ChatHistory` to be replayed against the already-advanced cache state. The model then immediately reached the `"User:"` anti-prompt mid-replay and returned `"User:"` as the complete response. + +**Impact**: +- Every response after the first returned the literal string `"User:"` or empty string +- Conversation appeared to work (no exceptions thrown) but all NPC replies were silent/broken +- Bug would worsen over time as history grew longer, since each call replayed an ever-larger history over a more advanced cache + +**Changes Made**: +- Added `ChatSession Session { get; set; }` to `NPCContext_intf` interface +- `NPCContext` constructor now creates `Session = new ChatSession(executor, history)` once at context creation time +- `talk2LLMWithContext()` updated to call `ctx.Session.ChatAsync(...)` directly — no `ChatSession` instantiation per call +- `NPCContext.Close()` sets `Session = null` alongside `Executor` and `History` + +**Recommendations**: +- `ChatSession` is stateful and not thread-safe; if concurrent NPC inference is ever needed, each concurrent request would need its own executor/context pair rather than sharing one `NPCContext` +