Tensorflow: using a FIFO queue for code running on

2019-09-10 20:29发布

问题:

The code below shows my attempt to run an algorithm on single GPUs and feed data to it using a FIFO queue. The data exists in a CSV file. I use a separate python thread to read from the file one line at a time and enqueue the line into a FIFO.

N = 16
num_ckfs =80000

q = [0.01 for i in range(N)]
q_ckfs = np.array([q for i in range(num_ckfs)])

r = [5]
r_ckfs = np.array([r for i in range(num_ckfs)])

init_var = [10.0 for i in range(N)]
init_var_ckfs = np.array([init_var for i in range(num_ckfs)])

init_state = [0.0 for i in range(N)]
init_state_ckfs = np.array([init_state for i in range(num_ckfs)])


class CKF(object):

    def __init__(self, num_ckfs, N):

            self.init_variances = tf.Variable(init_var_ckfs, name='init_variances', dtype=tf.float64)

            self.init_states = tf.Variable(init_state_ckfs, name='init_states', dtype=tf.float64)

            init_states_expanded = tf.expand_dims(self.init_states, 2) # num_ckfs X N X 1

            self.q_values = tf.constant(q_ckfs, name='q_values', dtype=tf.float64)
            self.r_values = tf.constant(r_ckfs, name='r_values', dtype=tf.float64)

            self.input_vectors = tf.placeholder(tf.float64, shape=[num_ckfs, N], name='input_vectors')

            self.z_k = tf.placeholder(tf.float64, shape=[num_ckfs, 1], name='z_k');

            q = tf.FIFOQueue(200, [tf.float64, tf.float64], shapes=[[num_ckfs,1], [num_ckfs,N]])
            self.enqueue_op = q.enqueue([self.z_k, self.input_vectors])

            observations, inputs = q.dequeue()
            #further processing using the input data

with tf.device('/gpu:0'):
  ckf_gpu0 = CKF(num_ckfs, N)


def load_and_enqueue():
        #read one line at a time
        #obvs_list corresponds to the first column
        #data_list corresponds to the rest of the columns
        session.run(ckf_gpu0.enqueue_op, feed_dict={
                                        ckf_gpu0.input_vectors: data_list[0], ckf_gpu0.z_k: obvs_list[0]})
     count += 1


t =  threading.Thread(target=load_and_enqueue)
t.start()

for i in range( num_rows):
    out = session.run([ckf_gpu0.projected_output ])

The first problem that I have run into is:

InvalidArgumentError (see above for traceback): Cannot assign a device to node 'fifo_queue': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.

Is there an alternate way to do such a thing, i.e. hide the I/O latency while the computation is being done on a GPU?