Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions rl_code/Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
'recurrent': config['RECURRENT'],
'attention': config['ATTENTION'],
'neighbors': config['NEIGHBORS'],
'broadcast': config.get('BROADCAST', False),
'gsp_input_size':config['GSP_INPUT_SIZE'],
'gsp_output_size':config['GSP_OUTPUT_SIZE'],
'gsp_look_back':config['GSP_LOOK_BACK'],
Expand Down Expand Up @@ -362,6 +363,11 @@
if model.gsp_neighbors:
agent_gsp_states = model.make_gsp_states(agent_prox_flags, old_heading_gsp)
ctde_gsp = model.choose_agent_gsp(agent_gsp_states, test_mode)
elif model.gsp_broadcast:
# GSP-B: per-agent self-centric view with full-broadcast
# [self_prox, self_prev_gsp, other_i_prox, other_i_prev_gsp, ...]
agent_gsp_states = model.make_gsp_states_broadcast(agent_prox_flags, old_heading_gsp)
ctde_gsp = model.choose_agent_gsp(agent_gsp_states, test_mode)
else:
ctde_gsp = model.choose_agent_gsp(agent_prox_flags, test_mode)
for i in range(Utility.params['num_robots']):
Expand All @@ -377,20 +383,27 @@
states, state_prox_flags = model.make_gsp_states(old_agent_prox_flags, neighbors_old_heading_gsp, True)
new_states = model.make_gsp_states(agent_prox_flags, old_heading_gsp)
for i in range(Utility.params['num_robots']):
# print(f'[AGENT] {i} PROX FLAGS:', state_prox_flags[i])
# only store if state has value
if np.sum(state_prox_flags[i]) > 0:
# print(f'[AGENT] {i} Has Value, Storing GSP State: {states[i]}')
if model.gsp_networks['learning_scheme'] == 'attention':
model.store_gsp_transition(states[i], label, 0, 0, 0)
else:
# Under the direct-MSE GSP training path, the 2nd arg
# (action field) carries the supervised target label.
# See GSP-RL fix/gsp-direct-mse-training PR #24 and
# Stelaris docs/research/2026-04-13-gsp-information-collapse-analysis.md.
# 2nd arg = label (supervised target for direct-MSE GSP training)
state = states[i]
new_state = new_states[i]
model.store_gsp_transition(state, label, 0, new_state, 0)
elif model.gsp_broadcast:
# GSP-B per-agent storage with broadcast inputs.
# state_t : broadcast view at previous step (uses neighbors_old_heading_gsp so
# the prev_gsp slot reflects the prediction from the previous tick)
# state_{t+1}: broadcast view at current step
states = model.make_gsp_states_broadcast(old_agent_prox_flags, neighbors_old_heading_gsp)
new_states = model.make_gsp_states_broadcast(agent_prox_flags, old_heading_gsp)
for i in range(Utility.params['num_robots']):
# Gate on self-prox being non-zero so we only store informative transitions,
# matching the GSP and GSP-N branches. Self-prox lives at index 0 under the
# self-first layout.
if states[i][0] != 0:
model.store_gsp_transition(states[i], label, 0, new_states[i], 0)
else:
for i in range(Utility.params['num_robots']):
if model.gsp_networks['learning_scheme'] == 'attention':
Expand Down
64 changes: 60 additions & 4 deletions rl_code/src/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,25 @@ def __init__(
gsp_min_max_action: float,
gsp_look_back: int,
gsp_sequence_length: int,
broadcast: bool = False,
prox_filter_angle_deg: float = 45.0,
n_hop_neighbors: int = 1,
):
if neighbors and broadcast:
raise ValueError(
"GSP variants neighbors=True and broadcast=True are mutually exclusive — "
"they overload gsp_input_size differently. Pick one."
)
if neighbors:
# 2 inputs from ownship (prev_gsp, avg_prox)
# 2 inputs from each neighbor (prev_gsp, avg_prox)
# 2*n_hop_neighbors for symmetry in both CW and CCW
gsp_input_size = 2+2*(n_hop_neighbors*2)
gsp_input_size = 2+2*(n_hop_neighbors*2)
if broadcast:
# GSP-B: each agent's view is (self_prox, self_prev_gsp) + (other_prox, other_prev_gsp)
# for all (n_agents - 1) other agents. Total 2*n_agents. Known limitation:
# coupled to team size, not transferable across num_robots.
gsp_input_size = 2 * n_agents

output_size = n_actions
if network in ['DQN', 'DDQN']:
Expand Down Expand Up @@ -68,13 +79,16 @@ def __init__(
self._network = network
self._n_actions = n_actions
self._neighbors = neighbors
self._broadcast = broadcast
self._n_hop_neighbors = n_hop_neighbors
self.neighbors_dict = {}
self._options_per_action = options_per_action
self._prox_filter_angle_deg = prox_filter_angle_deg


if self._neighbors:
if self._neighbors or self._broadcast:
# Per-agent observation ring buffers: GSP-N and GSP-B both produce
# per-agent self-centric views, so each agent has its own history.
self.gsp_observation = []
for _ in range(self._n_agents):
self.gsp_observation.append([[0 for _ in range(self.gsp_network_input)] for _ in range(self.gsp_sequence_length)])
Expand All @@ -98,6 +112,10 @@ def __init__(
def gsp_neighbors(self):
return self._neighbors

@property
def gsp_broadcast(self):
return self._broadcast

@property
def n_agents(self):
return self._n_agents
Expand Down Expand Up @@ -155,6 +173,40 @@ def make_agent_state(self, env_obs, heading_gsp=None, global_knowledge=None):
env_obs = np.concatenate((env_obs, global_knowledge))
return env_obs

def make_gsp_states_broadcast(self, agent_prox_values, agent_prev_gsp):
"""Build per-agent GSP inputs for GSP-B (full-broadcast variant).

Each agent's view is self-first: [self_prox, self_prev_gsp, other_0_prox,
other_0_prev_gsp, other_1_prox, other_1_prev_gsp, ..., other_{n-1}_prox,
other_{n-1}_prev_gsp]. "other" iterates all agents in ascending id order,
skipping self. Total length = 2 * n_agents.

Known limitation: the network input size is coupled to n_agents, so a
trained GSP-B policy does not transfer to teams of different size. This
is the tradeoff vs GSP-N, which uses fixed (self + n_hop_neighbors * 2)
inputs and transfers across team sizes.
"""
states = []
for agent in range(self._n_agents):
agent_state = np.zeros(self.gsp_network_input)
# Self first
agent_state[0] = agent_prox_values[agent]
agent_state[1] = agent_prev_gsp[agent]
i = 2
# Then every other agent in ascending id order, skipping self
for other in range(self._n_agents):
if other == agent:
continue
agent_state[i] = agent_prox_values[other]
agent_state[i + 1] = agent_prev_gsp[other]
i += 2
# Maintain gsp_observation ring buffer the same way make_gsp_states does,
# so recurrent/attention variants can still see sequences if added later.
self.gsp_observation[agent].pop(0)
self.gsp_observation[agent].append(agent_state)
states.append(agent_state)
return states

def make_gsp_states(self, agent_prox_values, agent_prev_gsp, return_prox_flags = False):
states = []
prox_flags = []
Expand Down Expand Up @@ -242,7 +294,11 @@ def choose_agent_action(self, observation, failures, test=False):
return actions, action_num

def choose_agent_gsp(self, agent_gsp_states, test = False):
if self._neighbors:
if self._neighbors or self._broadcast:
# Per-agent predictions with self-centric inputs. GSP-N (neighbors)
# and GSP-B (broadcast) share the same per-agent forward-pass shape;
# only the input vector differs. Non-recurrent broadcast uses the
# same stateless path as non-recurrent neighbors.
actions = []
for i in range(self._n_agents):
if self.recurrent_gsp:
Expand All @@ -257,7 +313,7 @@ def choose_agent_gsp(self, agent_gsp_states, test = False):
)
# Take the last timestep's action
actions.append(action_tensor[-1].cpu().detach().numpy())
else:
else:
actions.append(self.choose_action(agent_gsp_states[i], self.gsp_networks, test))
return actions
else:
Expand Down
123 changes: 123 additions & 0 deletions tests/test_agent/test_gsp_broadcast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Tests for GSP-B (full-broadcast variant) state construction.

GSP-B: each agent's input is [self_prox, self_prev_gsp, other_0_prox,
other_0_prev_gsp, other_1_prox, other_1_prev_gsp, ..., other_{n-1}_prox,
other_{n-1}_prev_gsp], length 2*n_agents. Self-first ordering.

Known limitation (inherited from plain GSP): the network input size is
coupled to n_agents, so a trained GSP-B policy only transfers to the same
team size. This is the tradeoff vs GSP-N's fixed (self + n_hop_neighbors)
input which transfers across team sizes.
"""

import numpy as np
import pytest

from src.agent import Agent


BASE_CONFIG = {
"GAMMA": 0.99, "TAU": 0.005, "ALPHA": 0.001, "BETA": 0.002, "LR": 0.0001,
"EPSILON": 0.0, "EPS_MIN": 0.0, "EPS_DEC": 0.0,
"BATCH_SIZE": 16, "MEM_SIZE": 1000, "REPLACE_TARGET_COUNTER": 10,
"NOISE": 0.0, "UPDATE_ACTOR_ITER": 1, "WARMUP": 0,
"GSP_LEARNING_FREQUENCY": 1, "GSP_BATCH_SIZE": 16,
}


def make_agent(n_agents=4, network="DDQN", broadcast=True):
return Agent(
config=BASE_CONFIG,
network=network,
n_agents=n_agents,
n_obs=8,
n_actions=4,
options_per_action=3,
id=0,
min_max_action=1.0,
meta_param_size=1,
gsp=True,
recurrent=False,
attention=False,
neighbors=False,
broadcast=broadcast,
gsp_input_size=4, # overridden when broadcast=True
gsp_output_size=1,
gsp_min_max_action=1.0,
gsp_look_back=2,
gsp_sequence_length=5,
)


def test_broadcast_agent_has_gsp_broadcast_property_true():
agent = make_agent()
assert agent.gsp_broadcast is True


def test_broadcast_agent_gsp_input_size_is_two_times_n_agents():
"""For 4 agents, the broadcast input is [self_prox, self_prev_gsp, +3×(prox, prev_gsp)] = 8."""
agent = make_agent(n_agents=4)
assert agent.gsp_network_input == 8


def test_broadcast_agent_gsp_input_size_scales_with_n_agents():
"""For 8 agents, input is 16. Known limitation: coupled to team size."""
agent = make_agent(n_agents=8)
assert agent.gsp_network_input == 16


def test_make_gsp_states_broadcast_returns_one_state_per_agent():
agent = make_agent(n_agents=4)
prox = [0.1, 0.2, 0.3, 0.4]
prev_gsp = [-0.5, 0.0, 0.25, 0.75]
states = agent.make_gsp_states_broadcast(prox, prev_gsp)
assert len(states) == 4
for s in states:
assert len(s) == 8


def test_make_gsp_states_broadcast_self_first_ordering():
"""For each agent i, the first two entries must be (prox[i], prev_gsp[i])."""
agent = make_agent(n_agents=4)
prox = [0.11, 0.22, 0.33, 0.44]
prev_gsp = [-0.1, -0.2, -0.3, -0.4]
states = agent.make_gsp_states_broadcast(prox, prev_gsp)
for i in range(4):
assert states[i][0] == pytest.approx(prox[i]), f"agent {i} self_prox"
assert states[i][1] == pytest.approx(prev_gsp[i]), f"agent {i} self_prev_gsp"


def test_make_gsp_states_broadcast_others_in_order():
"""After the self-pair, the remaining entries are other agents in ascending id order (skipping self)."""
agent = make_agent(n_agents=4)
prox = [0.10, 0.20, 0.30, 0.40]
prev_gsp = [0.01, 0.02, 0.03, 0.04]
states = agent.make_gsp_states_broadcast(prox, prev_gsp)
# Agent 0: self=0, others=[1, 2, 3]
assert list(states[0]) == pytest.approx([0.10, 0.01, 0.20, 0.02, 0.30, 0.03, 0.40, 0.04])
# Agent 2: self=2, others=[0, 1, 3]
assert list(states[2]) == pytest.approx([0.30, 0.03, 0.10, 0.01, 0.20, 0.02, 0.40, 0.04])
# Agent 3: self=3, others=[0, 1, 2]
assert list(states[3]) == pytest.approx([0.40, 0.04, 0.10, 0.01, 0.20, 0.02, 0.30, 0.03])


def test_broadcast_is_mutually_exclusive_with_neighbors():
"""Can't have both neighbors=True and broadcast=True; they overload gsp_input_size."""
with pytest.raises((ValueError, AssertionError)):
Agent(
config=BASE_CONFIG,
network="DDQN", n_agents=4, n_obs=8, n_actions=4,
options_per_action=3, id=0, min_max_action=1.0, meta_param_size=1,
gsp=True, recurrent=False, attention=False,
neighbors=True, broadcast=True,
gsp_input_size=4, gsp_output_size=1,
gsp_min_max_action=1.0, gsp_look_back=2, gsp_sequence_length=5,
)


def test_plain_gsp_without_broadcast_unchanged():
"""Plain GSP (neighbors=False, broadcast=False) keeps the legacy input size."""
agent = make_agent(broadcast=False)
# Should fall through to the config-provided gsp_input_size=4
assert agent.gsp_network_input == 4
assert agent.gsp_broadcast is False
Loading