4WayBeacon A2C Script Review
states = np.empty(shape=[0, 2])
actions_list = np.empty(shape=[0, 4])
next_states = np.empty(shape=[0, 2])
rewards = np.empty(shape=[0, 1])marine_y, marine_x = (obs[0].observation.feature_screen.base[5] == 1).nonzero()
beacon_y, beacon_x = (obs[0].observation.feature_screen.base[5] == 3).nonzero()
marine_x, marine_y, beacon_x, beacon_y = np.mean(marine_x), np.mean(marine_y), np.mean(beacon_x), np.mean(beacon_y)
state = [marine_x*10/63 - beacon_x*10/63, marine_y*10/63 - beacon_y*10/63]if global_step == 200 or distance < 0.3: done = True
if global_step == 200: reward = -1
if distance < 0.3: reward = 0
states = np.vstack([states, state])
next_states = np.vstack([next_states, next_state])
rewards = np.vstack([rewards, reward])
action = np.zeros(4)
action[act] = 1
actions_list = np.vstack([actions_list, action])
Last updated