ITM-Kitware · dmjoy · Dec 18, 2025 · Dec 19, 2025 · Dec 19, 2025 · Dec 24, 2025
diff --git a/.gitignore b/.gitignore
@@ -5,5 +5,6 @@ __pycache__/
 outputs
 slurm*.out
 
+.idea/
 .vscode/
 
diff --git a/align_system/algorithms/alignment_adm_component.py b/align_system/algorithms/alignment_adm_component.py
@@ -327,14 +327,15 @@ def run_returns(self):
 
     def _compute_p_choose_a(self, kdma, intercept, medical_weight, attr_weight, raw_medical_delta, raw_attr_score):
         # Provided by ADEPT 2025-12-12
+        # MF updated 2026-01-21
         scaling = {
             "affiliation": {
                 "medical": [0.403801, 0.297245],
                 "attribute": [0.405073, 0.298288],
             },
             "merit": {
-                "medical": [0.433409, 0.308294],
-                "attribute": [0.357632, 0.27947],
+                "medical": [0.428961, 0.301250],
+                "attribute": [0.337618, 0.272520],
             },
             "personal_safety": {
                 "medical": [0.456221, 0.246484],

diff --git a/align_system/algorithms/icl_adm_component.py b/align_system/algorithms/icl_adm_component.py
@@ -1,6 +1,11 @@
+import re
+import inspect
+import copy
 from functools import lru_cache
 from collections.abc import Mapping
 
+import ubelt as ub
+
 from align_system.utils import logging, call_with_coerced_args
 from align_system.utils.alignment_utils import attributes_in_alignment_target
 from align_system.algorithms.abstracts import ADMComponent
@@ -37,7 +42,8 @@ def __init__(self,
                  scenario_description_template,
                  prompt_template,
                  attributes=None,
-                 target_attribute_names_override=None):
+                 target_attribute_names_override=None,
+                 enable_caching=False):
         self.icl_generator_partial = icl_generator_partial
         self.scenario_description_template = scenario_description_template
 
@@ -49,6 +55,8 @@ def __init__(self,
 
         self.target_attribute_names_override = target_attribute_names_override
 
+        self.enable_caching = enable_caching
+
     def run_returns(self):
         return ('icl_dialog_elements', 'icl_example_info')
 
@@ -77,6 +85,29 @@ def run(self,
 
         target_attributes = [self.attributes[n] for n in target_attribute_names]
 
+        if self.enable_caching:
+            scenario_state_copy = copy.deepcopy(scenario_state)
+            if hasattr(scenario_state, 'elapsed_time'):
+                # Don't consider the elapsed_time of the state when caching
+                scenario_state_copy.elapsed_time = 0
+
+            depends = '\n'.join((
+                self.cache_repr(),
+                repr(scenario_state_copy),
+                repr(choices),
+                repr(target_attribute_names)))
+
+            cacher = ub.Cacher('icl_adm_component', depends, verbose=0)
+            log.debug(f'cacher.fpath={cacher.fpath}')
+
+            cached_output = cacher.tryload()
+            if cached_output is not None:
+                log.info("Cache hit for `icl_adm_component`"
+                         " returning cached output")
+                return cached_output
+            else:
+                log.info("Cache miss for `icl_adm_component` ..")
+
         # Mapping covers `dict` and `omegaconf.dictconfig.DictConfig`
         if not isinstance(alignment_target, Mapping):
             alignment_target_dict = alignment_target.to_dict()
@@ -85,7 +116,8 @@ def run(self,
 
         alignment_target_value_lookup = {
             kdma_values['kdma']: kdma_values['value']
-            for kdma_values in alignment_target_dict['kdma_values']}
+            for kdma_values in alignment_target_dict['kdma_values']
+            if 'value' in kdma_values}
 
         icl_dialog_elements = {}
         icl_example_info = {}
@@ -148,7 +180,40 @@ def run(self,
                 }
                 icl_example_info[attribute.kdma].append(icl_info)
 
-        return icl_dialog_elements, icl_example_info
+        outputs = (icl_dialog_elements, icl_example_info)
+
+        if self.enable_caching:
+            cacher.save(outputs)
+
+        return outputs
+
+    def cache_repr(self):
+        '''
+        Return a string representation of this object for caching;
+        .i.e. if the return value of this function is the same for two
+        object instances, it's assumed that `run` output will be
+        the same if given the same parameters
+        '''
+
+        def _generic_object_repr(obj):
+            init_params = inspect.signature(obj.__class__.__init__).parameters
+            obj_vars = vars(obj)
+
+            return "{}.{}({})".format(
+                obj.__class__.__module__,
+                obj.__class__.__name__,
+                ", ".join([f"{p}={obj_vars[p]}" for p in init_params
+                           if p != 'self' and p != 'args' and p != 'kwargs']))
+
+        return re.sub(r'^\s+', '',
+                      f"""
+                       {self.__class__.__module__}.{self.__class__.__name__}(
+                       icl_generator_partial={self.icl_generator_partial},
+                       scenario_description_template={_generic_object_repr(self.scenario_description_template)},
+                       prompt_template={_generic_object_repr(self.prompt_template)},
+                       attributes={self.attributes},
+                       target_attribute_names_override={self.target_attribute_names_override},
+                       )""", flags=re.MULTILINE).strip()
 
 
 # ICL Engines dependent on alignment target, but that could change

diff --git a/align_system/algorithms/prompt_based_aligned_adm_component.py b/align_system/algorithms/prompt_based_aligned_adm_component.py
@@ -1,5 +1,10 @@
+import re
+import inspect
+import copy
+
 from rich.highlighter import JSONHighlighter
 from swagger_client.models import KDMAValue
+import ubelt as ub
 
 from align_system.utils import logging, call_with_coerced_args
 from align_system.algorithms.abstracts import ADMComponent
@@ -24,7 +29,8 @@ def __init__(self,
                  num_negative_samples=0,
                  vote_calculator_fn=calculate_votes,
                  filter_votes_to_positives=True,
-                 shuffle_choices=True):
+                 shuffle_choices=True,
+                 enable_caching=False):
         self.structured_inference_engine = structured_inference_engine
         self.scenario_description_template = scenario_description_template
         self.prompt_template = prompt_template
@@ -40,6 +46,8 @@ def __init__(self,
 
         self.shuffle_choices = shuffle_choices
 
+        self.enable_caching = enable_caching
+
     def run_returns(self):
         return ('chosen_choice', 'justification', 'dialog')
 
@@ -61,6 +69,31 @@ def run(self,
         # Assumption here is that KDMA values range from 0-1
         negative_value = 1 - value
 
+        if self.enable_caching:
+            scenario_state_copy = copy.deepcopy(scenario_state)
+            if hasattr(scenario_state, 'elapsed_time'):
+                # Don't consider the elapsed_time of the state when caching
+                scenario_state_copy.elapsed_time = 0
+
+            depends = '\n'.join((
+                self.cache_repr(),
+                repr(scenario_state_copy),
+                repr(choices),
+                repr(positive_icl_dialog_elements),
+                repr(negative_icl_dialog_elements),
+                repr(kdma_value)))
+
+            cacher = ub.Cacher('prompt_based_aligned_adm_component', depends, verbose=0)
+            log.debug(f'cacher.fpath={cacher.fpath}')
+
+            cached_output = cacher.tryload()
+            if cached_output is not None:
+                log.info("Cache hit for `prompt_based_aligned_adm_component`"
+                         " returning cached output")
+                return cached_output
+            else:
+                log.info("Cache miss for `prompt_based_aligned_adm_component` ..")
+
         scenario_description = call_with_coerced_args(
             self.scenario_description_template,
             {'scenario_state': scenario_state})
@@ -182,4 +215,45 @@ def run(self,
                 top_choice_justification = response['detailed_reasoning']
                 break
 
-        return top_choice, top_choice_justification, positive_dialog
+        outputs = (top_choice, top_choice_justification, positive_dialog)
+
+        if self.enable_caching:
+            cacher.save(outputs)
+
+        return outputs
+
+    def cache_repr(self):
+        '''
+        Return a string representation of this object for caching;
+        .i.e. if the return value of this function is the same for two
+        object instances, it's assumed that `run` output will be
+        the same if given the same parameters
+        '''
+
+        def _generic_object_repr(obj):
+            if obj is None:
+                return "None"
+
+            init_params = inspect.signature(obj.__class__.__init__).parameters
+            obj_vars = vars(obj)
+
+            return "{}.{}({})".format(
+                obj.__class__.__module__,
+                obj.__class__.__name__,
+                ", ".join([f"{p}={obj_vars[p]}" for p in init_params
+                           if p != 'self' and p != 'args' and p != 'kwargs']))
+
+        return re.sub(r'^\s+', '',
+                      f"""
+                       {self.__class__.__module__}.{self.__class__.__name__}(
+                       structured_inference_engine={self.structured_inference_engine.cache_repr()},
+                       scenario_description_template={_generic_object_repr(self.scenario_description_template)},
+                       prompt_template={_generic_object_repr(self.prompt_template)},
+                       output_schema_template={_generic_object_repr(self.output_schema_template)},
+                       system_prompt_template={_generic_object_repr(self.system_prompt_template)},
+                       num_positive_samples={self.num_positive_samples},
+                       num_negative_samples={self.num_negative_samples},
+                       vote_calculator_fn={_generic_object_repr(self.vote_calculator_fn)},
+                       filter_votes_to_positives={self.filter_votes_to_positives},
+                       shuffle_choices={self.shuffle_choices},
+                       )""", flags=re.MULTILINE).strip()
diff --git a/align_system/configs/adm/phase2_pipeline_direct_medical_regression.yaml b/align_system/configs/adm/phase2_pipeline_direct_medical_regression.yaml
@@ -1,4 +1,4 @@
-name: phase2_pipeline_zeroshot_comparative_regression_swap_average
+name: phase2_pipeline_direct_medical_regression
 
 defaults:
   # Import defaults into this namspace (adm) as @name, for further

diff --git a/align_system/configs/adm/phase2_pipeline_direct_regression.yaml b/align_system/configs/adm/phase2_pipeline_direct_regression.yaml
@@ -1,4 +1,4 @@
-name: phase2_pipeline_zeroshot_comparative_regression_swap_average
+name: phase2_pipeline_direct_regression
 
 defaults:
   # Import defaults into this namspace (adm) as @name, for further

diff --git a/align_system/configs/experiment/phase2_feb_collab/phase2_baseline.yaml b/align_system/configs/experiment/phase2_feb_collab/phase2_baseline.yaml
@@ -0,0 +1,37 @@
+# @package _global_
+defaults:
+  - override /adm: pipeline_baseline
+  - override /inference_engine@adm.structured_inference_engine: outlines_structured_greedy
+  - override /interface: ta3
+
+interface:
+  api_endpoint: "https://darpaitm.caci.com"
+  session_type: adept
+  training_session: full
+  username: "testrun-pipeline_baseline"
+  domain: "p2triage"
+
+adm:
+  step_definitions:
+    outlines_baseline:
+      scenario_description_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.Phase2ScenarioDescription
+      prompt_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.Phase2BaselinePrompt
+
+      enable_caching: true
+
+  instance:
+    steps:
+    # Reference the step instances we want to use in order
+    - ${ref:adm.step_definitions.format_choices}
+    - ${ref:adm.step_definitions.outlines_baseline}
+    # - ${ref:adm.step_definitions.action_parameter_completion}
+    - ${ref:adm.step_definitions.ensure_chosen_action}
+    - ${ref:adm.step_definitions.populate_choice_info}
+
+driver:
+  apply_action_filtering: false
+
+force_determinism: true
+align_to_target: false
diff --git a/.../configs/experiment/phase2_feb_collab/phase2_baseline_deepseek_llama_live_eval_multi.yaml b/.../configs/experiment/phase2_feb_collab/phase2_baseline_deepseek_llama_live_eval_multi.yaml
@@ -0,0 +1,44 @@
+# @package _global_
+defaults:
+  - override /adm: pipeline_baseline
+  - override /interface: ta3
+
+interface:
+  api_endpoint: "https://darpaitm.caci.com"
+  session_type: eval
+  training_session: null
+  username: "ALIGN-ADM-OutlinesBaseline-DeepSeek-R1-Distill-Llama-8B"
+  domain: "p2triage"
+
+adm:
+  structured_inference_engine:
+    model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+
+  step_definitions:
+    outlines_baseline:
+      scenario_description_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.Phase2ScenarioDescription
+      prompt_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.Phase2BaselinePrompt
+
+      enable_caching: true
+
+  instance:
+    steps:
+    # Reference the step instances we want to use in order
+    - ${ref:adm.step_definitions.format_choices}
+    - ${ref:adm.step_definitions.outlines_baseline}
+    # - ${ref:adm.step_definitions.action_parameter_completion}
+    - ${ref:adm.step_definitions.ensure_chosen_action}
+    - ${ref:adm.step_definitions.populate_choice_info}
+
+driver:
+  apply_action_filtering: false
+
+force_determinism: true
+align_to_target: false
+save_last_unstructured_state_per_scenario: true
+
+hydra:
+  run:
+    dir: 'phase2_feb2026_multi_results_live/phase2_baseline_deepseek_llama_live_eval_multi/${now:%Y-%m-%d__%H-%M-%S}'
diff --git a/...m/configs/experiment/phase2_feb_collab/phase2_baseline_deepseek_llama_live_eval_test.yaml b/...m/configs/experiment/phase2_feb_collab/phase2_baseline_deepseek_llama_live_eval_test.yaml
@@ -0,0 +1,44 @@
+# @package _global_
+defaults:
+  - override /adm: pipeline_baseline
+  - override /interface: ta3
+
+interface:
+  api_endpoint: "https://darpaitm.caci.com"
+  session_type: eval
+  training_session: null
+  username: "testrun-pipeline_baseline_deepseek_llama"
+  domain: "p2triage"
+
+adm:
+  structured_inference_engine:
+    model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+
+  step_definitions:
+    outlines_baseline:
+      scenario_description_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.Phase2ScenarioDescription
+      prompt_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.Phase2BaselinePrompt
+
+      enable_caching: true
+
+  instance:
+    steps:
+    # Reference the step instances we want to use in order
+    - ${ref:adm.step_definitions.format_choices}
+    - ${ref:adm.step_definitions.outlines_baseline}
+    # - ${ref:adm.step_definitions.action_parameter_completion}
+    - ${ref:adm.step_definitions.ensure_chosen_action}
+    - ${ref:adm.step_definitions.populate_choice_info}
+
+driver:
+  apply_action_filtering: false
+
+force_determinism: true
+align_to_target: false
+save_last_unstructured_state_per_scenario: true
+
+hydra:
+  run:
+    dir: 'phase2_feb2026_results_local/phase2_baseline_deepseek_llama_live_eval_test/${now:%Y-%m-%d__%H-%M-%S}'
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,5 +5,6 @@ __pycache__/ @@
     outputs
     slurm*.out
+    .idea/
     .vscode/