不能改善AlexNet在Oxford 102(Tensorflow)的精度(Cannot impro

2019-09-28 05:37发布

您好我试图执行AlexNet 使用预训练的权重。 我试图培养在Oxford 102集的网,但我不断收到0.9%的精度在整个过程中,并改变参数没有帮助,下面的代码有人可以帮我吗?

我下面这个教程

我切换给定的测试集(这是大)作为训练集,给定的训练以使用作为测试集来使用。 我用梯度下降的优化。

我构建了相当多的定项目所做的,可能是坏了我计算精度的方式相同AlexNet?

下面是我加载数据的方式

import os
import sys
import warnings

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

from skimage.io import imread
from skimage.transform import resize

from scipy.io import loadmat

import tensorflow as tf

warnings.filterwarnings('ignore', category=UserWarning, module='skimage')

set_ids = loadmat('setid.mat')

set_ids

test_ids = set_ids['trnid'].tolist()[0]
train_ids = set_ids['tstid'].tolist()[0]

def indexes_processing(int_list):
    returned_list = []
    for index, element in enumerate(int_list):
        returned_list.append(str(element))
    for index, element in enumerate(returned_list):
        if int(element) < 10:
            returned_list[index] = '0000' + element
        elif int(element) < 100:
            returned_list[index] = '000' + element
        elif int(element) < 1000:
            returned_list[index] = '00' + element
        else:
            returned_list[index] = '0' + element
    return returned_list

raw_train_ids = indexes_processing(train_ids)
raw_test_ids = indexes_processing(test_ids)

train_images = []
test_images = []
train_labels = []
test_labels = []

image_labels = (loadmat('imagelabels.mat')['labels'] - 1).tolist()[0]

labels = ['pink primrose', 'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea', 'english marigold', 'tiger lily', 'moon orchid', 'bird of paradise', 'monkshood', 'globe thistle', 'snapdragon', "colt's foot", 'king protea', 'spear thistle', 'yellow iris', 'globe-flower', 'purple coneflower', 'peruvian lily', 'balloon flower', 'giant white arum lily', 'fire lily', 'pincushion flower', 'fritillary', 'red ginger', 'grape hyacinth', 'corn poppy', 'prince of wales feathers', 'stemless gentian', 'artichoke', 'sweet william', 'carnation', 'garden phlox', 'love in the mist', 'mexican aster', 'alpine sea holly', 'ruby-lipped cattleya', 'cape flower', 'great masterwort', 'siam tulip', 'lenten rose', 'barbeton daisy', 'daffodil', 'sword lily', 'poinsettia', 'bolero deep blue', 'wallflower', 'marigold', 'buttercup', 'oxeye daisy', 'common dandelion', 'petunia', 'wild pansy', 'primula', 'sunflower', 'pelargonium', 'bishop of llandaff', 'gaura', 'geranium', 'orange dahlia', 'pink-yellow dahlia?', 'cautleya spicata', 'japanese anemone', 'black-eyed susan', 'silverbush', 'californian poppy', 'osteospermum', 'spring crocus', 'bearded iris', 'windflower', 'tree poppy', 'gazania', 'azalea', 'water lily', 'rose', 'thorn apple', 'morning glory', 'passion flower', 'lotus', 'toad lily', 'anthurium', 'frangipani', 'clematis', 'hibiscus', 'columbine', 'desert-rose', 'tree mallow', 'magnolia', 'cyclamen ', 'watercress', 'canna lily', 'hippeastrum ', 'bee balm', 'ball moss', 'foxglove', 'bougainvillea', 'camellia', 'mallow', 'mexican petunia', 'bromelia', 'blanket flower', 'trumpet creeper', 'blackberry lily']

labels[16]

def one_hot_encode(labels):
    '''
    One hot encode the output labels to be numpy arrays of 0s and 1s
    '''
    out = np.zeros((len(labels), 102))
    for index, element in enumerate(labels):
        out[index, element] = 1
    return out

class ProcessImage():

    def __init__(self):           
        self.i = 0

        self.training_images = np.zeros((6149, 227, 227, 3))
        self.training_labels = None

        self.testing_images = np.zeros((1020, 227, 227, 3))
        self.testing_labels = None

    def set_up_images(self):
        print('Processing Training Images...')
        i = 0
        for element in raw_train_ids:
            img = imread('jpg/image_{}.jpg'.format(element))
            img = resize(img, (227, 227))
            self.training_images[i] = img
            i += 1
        print('Done!')

        i = 0
        print('Processing Testing Images...')
        for element in raw_test_ids:
            img = imread('jpg/image_{}.jpg'.format(element))
            img = resize(img, (227, 227))
            self.testing_images[i] = img
            i += 1
        print('Done!')

        print('Processing Training and Testing Labels...')
        encoded_labels = one_hot_encode(image_labels)
        for train_id in train_ids:
            train_labels.append(encoded_labels[train_id - 1])
        for test_id in test_ids:
            test_labels.append(encoded_labels[test_id - 1])
        self.training_labels = train_labels
        self.testing_labels = test_labels
        print('Done!')

    def next_batch(self, batch_size):
        x = self.training_images[self.i:self.i + batch_size]
        y = self.training_labels[self.i:self.i + batch_size]
        self.i = (self.i + batch_size) % len(self.training_images)
        return x, y

image_processor = ProcessImage()

image_processor.set_up_images()

我的图表

# Helper Functions for AlexNet
def init_weights(filter_height, filter_width, num_channels, num_filters):
    init_random_dist = tf.truncated_normal([filter_height, filter_width, num_channels, num_filters], stddev=0.1)
    return tf.Variable(init_random_dist)

def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

def conv2d(x, W, stride_y, stride_x, padding='SAME'):
    return tf.nn.conv2d(x, W, strides=[1,stride_y,stride_x,1], padding=padding)

def max_pool(x, filter_height, filter_width, stride_y, stride_x, padding='SAME'):
    return tf.nn.max_pool(x, ksize=[1,filter_height,filter_width,1], strides=[1,stride_y,stride_x,1], padding=padding)

def conv_layer(input_x, filter_height, filter_width, num_channels, num_filters, stride_y, stride_x, padding='SAME', groups=1):
    W = init_weights(filter_height, filter_width, int(num_channels/groups), num_filters)
    b = init_bias([num_filters])
    convolve = lambda i, k: tf.nn.conv2d(i, k, strides=[1,stride_y,stride_x,1], padding=padding)
    if groups == 1:
        conv = convolve(input_x, W)
    else:
        input_groups = tf.split(axis=3, num_or_size_splits=groups, value=input_x)
        weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W)
        output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)]
        conv = tf.concat(axis=3, values=output_groups)
    bias = tf.reshape(tf.nn.bias_add(conv, b), tf.shape(conv))
    return tf.nn.relu(bias)

def lrn(x, radius, alpha, beta, bias=1.0):
    return tf.nn.local_response_normalization(x, depth_radius=radius, alpha=alpha, beta=beta, bias=bias)

def fully_connected(input_layer, num_in, num_out, relu=True):
    W = tf.truncated_normal([num_in, num_out], stddev=0.1)
    W = tf.Variable(W)
    b = init_bias([num_out])
    out = tf.nn.xw_plus_b(input_layer, W, b)
    if relu:
        return tf.nn.relu(out)
    else:
        return out

def drop_out(x, keep_prob):
    return tf.nn.dropout(x, keep_prob=keep_prob)

x = tf.placeholder(tf.float32, shape=[None, 227, 227, 3])
y_true = tf.placeholder(tf.float32, shape=[None, 102])
keep_prob = tf.placeholder(tf.float32)

# Create the graph

# 1st Layer: Conv (w ReLu) -> Lrn -> Pool
conv_1 = conv_layer(x, filter_height=11, filter_width=11, num_channels=3, num_filters=96, stride_y=4, stride_x=4, padding='VALID')
norm_1 = lrn(conv_1, radius=2, alpha=1e-05, beta=0.75)
pool_1 = max_pool(norm_1, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')
pool_1.get_shape()

# 2nd Layer: Conv (w ReLu) -> Lrn -> Pool
conv_2 = conv_layer(pool_1, filter_height=5, filter_width=5, num_channels=96, num_filters=256, stride_y=1, stride_x=1, groups=2)
norm_2 = lrn(conv_2, radius=2, alpha=1e-05, beta=0.75)
pool_2 = max_pool(norm_2, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')

# 3rd Layer: Conv (w ReLu)
conv_3 = conv_layer(pool_2, filter_height=3, filter_width=3, num_channels=256, num_filters=384, stride_y=1, stride_x=1)

# 4th Layer: Conv (w ReLu)
conv_4 = conv_layer(conv_3, filter_height=3, filter_width=3, num_channels=384, num_filters=384, stride_y=1, stride_x=1, groups=2)

# 5th Layer: Conv (w ReLu) -> Pool
conv_5 = conv_layer(conv_4, filter_height=3, filter_width=3, num_channels=384, num_filters=256, stride_y=1, stride_x=1, groups=2)
pool_5 = max_pool(conv_5, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')

# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
pool_6_flat = tf.reshape(pool_5, [-1, 6*6*256])
full_6 = fully_connected(pool_6_flat, 6*6*256, 4096)
full_6_dropout = drop_out(full_6, keep_prob)

# 7th Layer: FC (w ReLu) -> Dropout
full_7 = fully_connected(full_6_dropout, 4096, 4096)
full_7_dropout = drop_out(full_7, keep_prob)

# 8th Layer: FC and return unscaled activations
y_pred = fully_connected(full_7_dropout, 4096, 102, relu=False)

丧失功能和优化

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train = optimizer.minimize(cross_entropy)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

运行会话

with tf.Session() as sess:
    sess.run(init)
    for i in range(15000):
        batches = image_processor.next_batch(128)
        sess.run(train, feed_dict={x:batches[0], y_true:batches[1], keep_prob:0.5})

        if (i%1000 == 0):
            print('On Step {}'.format(i))
            print('Accuracy is: ')
            matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
            acc = tf.reduce_mean(tf.cast(matches, tf.float32))

            print(sess.run(acc, feed_dict={x:image_processor.testing_images, y_true:image_processor.testing_labels, keep_prob:1.0}))

            print('Saving model...')
            saver.save(sess, 'models/model_iter.ckpt', global_step=i)
            print('Saved at step: {}'.format(i))
            print('\n')
    print('Saving final model...')
    saver.save(sess, 'models/model_final.ckpt')
    print('Saved')

我一直得到的0.00903922 相同的精度一遍又一遍(在整个15000个时期),无论我如何努力改变参数,我甚至试图将图像的大小从224变为227,但它仍然给了我同样的的0.00903922准确性。

Answer 1:

你的准确度看起来好像没什么问题,虽然这是一个有点怪在每次循环定义。

是什么困扰我的是,你训练只为十个步骤的事实。 看来你的训练组由6149幅的图像,你是在一个批处理培训128倍的图像。 这样做十次,你看过1280出6000个的图像 - 的方式太少看到精度的影响。

相反,你想看看所有的训练数据-这是48个左右的训练步骤,或一个时代 -你最好想这样做了几次。 时代的确切数目取决于多种因素,如数据和网络,但你至少应该10个时代 - 所以这是480个的训练步骤。



文章来源: Cannot improve accuracy of AlexNet on Oxford-102 (Tensorflow)