4WayBeacon A2C Code Review
a2c.py
Class Structure
└── Policy_net(Class)
├── __init__(Function)
├── learn(Function)
├── _build_net(Function)
└── choose_action(Function)__init__(self, sess, exp_rate)
def __init__(self, sess, exp_rate):
self.sess = sess
self.state_size = 2
self.action_size = 4
self.exp_rate = exp_rate
self.X = tf.placeholder(tf.float32, [None, self.state_size])
self.a = tf.placeholder(tf.float32, [None, self.action_size])
self.r = tf.placeholder(tf.float32, [None, 1])
self.v_ = tf.placeholder(tf.float32, [None, 1])
self.actor, self.critic = self._bulid_net()
self.td_error = self.r + 0.99 * self.v_ - self.critic
self.closs = tf.square(self.td_error)
self.train_cop = tf.train.AdamOptimizer(0.0001).minimize(self.closs)
self.log_lik = self.a * tf.log(self.actor)
self.log_lik_adv = self.log_lik * self.td_error
self.exp_v = tf.reduce_mean(tf.reduce_sum(self.log_lik_adv, axis=1))
self.entropy = -tf.reduce_sum(self.actor * tf.log(self.actor))
self.obj_func = self.exp_v + self.exp_rate * self.entropy
self.loss = -self.obj_func
self.train_aop = tf.train.AdamOptimizer(0.0001).minimize(self.loss)learn(self, state, next_state, reward, action)
_build_net(self)
choose_action(self, s)
Last updated