NESTLab · jdbloom · Apr 10, 2026 · Apr 10, 2026
diff --git a/rl_code/Main.py b/rl_code/Main.py
@@ -13,7 +13,7 @@
 from struct import pack, unpack, Struct
 import numpy as np
 import math
-import copy
+
 import zmq
 import csv
 import os
@@ -53,6 +53,7 @@
 if args.model_path is not None:
     model_file_path = os.path.join(containing_folder, args.model_path)
 learning_scheme = config['LEARNING_SCHEME']
+learn_every = int(config.get('LEARN_EVERY', 1))
 port = str(config['PORT'])
 test_mode = args.test
 train_mode = not test_mode
@@ -173,7 +174,6 @@
         if not exp_done:
             time_steps = 0
 
-            object_positions = []
             agent_prox_flags = []
             last_object_heading = None
 
@@ -183,7 +183,6 @@
 
             # Receive initial observations from the environment
             env_observations, failures, rewards, stats, robot_stats, obj_stats = Utility.parse_msgs(msgs)
-            object_positions.append([obj_stats[0], obj_stats[1]])
             old_cyl_ang = obj_stats[5]
 
             if Utility.params['num_obstacles'] > 0:
@@ -312,10 +311,6 @@
                     elif Utility.params['use_gate'] == 1:
                         gate_stats = Utility.parse_gate_stats(msgs[7])
 
-                    old_object_positions = copy.deepcopy(object_positions)
-                    object_positions.append([obj_stats[0], obj_stats[1]])
-
-
                     ############################## gsp REWARD ##############################################
                     gsp_reward, label = calculate_gsp_reward(
                         config['GSP'], 
@@ -331,9 +326,9 @@
 
                     old_cyl_ang = obj_stats[5]
 
-                    old_agent_prox_flags = copy.deepcopy(agent_prox_flags)
-                    neighbors_old_heading_gsp = copy.deepcopy(old_heading_gsp)
-                    old_heading_gsp = copy.deepcopy(next_heading_gsp)
+                    old_agent_prox_flags = list(agent_prox_flags)
+                    neighbors_old_heading_gsp = old_heading_gsp.copy()
+                    old_heading_gsp = next_heading_gsp.copy()
 
                     new_agent_states = []
                     force_mags = []
@@ -500,11 +495,14 @@
                         r.append(rewards[i][0])
 
                     if train_mode and config['LEARNING_SCHEME'] != 'None':
-                        if args.independent_learning:
-                            for i in range(Utility.params['num_robots']):
-                                loss = models[i].learn()
+                        if time_steps % learn_every == 0:
+                            if args.independent_learning:
+                                for i in range(Utility.params['num_robots']):
+                                    loss = models[i].learn()
+                            else:
+                                loss = model.learn()
                         else:
-                            loss = model.learn()
+                            loss = 0
                     else:
                         loss = 0
 

diff --git a/run_baseline_experiments.py b/run_baseline_experiments.py
@@ -153,14 +153,15 @@ def make_config(exp_name, gsp, neighbors, num_obstacles, use_gate, gate_curricul
         "EPSILON": 1.0,
         "EPS_MIN": 0.01,
         "EPS_DEC": 0.0001,
-        "BATCH_SIZE": 64,
+        "BATCH_SIZE": 256,
         "MEM_SIZE": 100000,
         "REPLACE_TARGET_COUNTER": 1000,
         "NOISE": 0.1,
         "UPDATE_ACTOR_ITER": 2,
         "WARMUP": 1000,
         "GSP_LEARNING_FREQUENCY": 500,
-        "GSP_BATCH_SIZE": 128,
+        "LEARN_EVERY": 4,
+        "GSP_BATCH_SIZE": 256,
     }