Feature selection using python

2019-06-09 12:19发布

It's a letter recognition task and there are 284 images, and 19 classes. I want to apply naive bayesian. First I have to convert each image to feature vector and for reducing extra info I should use some feature selection code like cropping images to remove extra black borders. But I'm not much experienced in python.

How can I crop black spaces in images in order to decrease the size of csv files? ( because the columns are more than expected!) And also how can I resize images to be the same size?

from PIL import Image, ImageChops
from resize import trim
import numpy as np
import cv2
import os
import csv

#Useful function
def createFileList(myDir, format='.jpg'):
    fileList = []
    print(myDir)
    for root, dirs, files in os.walk(myDir, topdown=False):
     for name in files:
        if name.endswith(format):
            fullName = os.path.join(root, name)
            fileList.append(fullName)

    return fileList

# load the original image
myFileList = createFileList('image_ocr')
#print(myFileList)
for file in myFileList:
    #print(file)
    img_file = Image.open(file)
    # img_file.show()

    # get original image parameters...
    width, height = img_file.size
    format = img_file.format
    mode = img_file.mode

    # Make image Greyscale
    img_grey = img_file.convert('L')


    # Save Greyscale values
    value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))

    value = value.flatten()
    #print(value)
    with open("trainData.csv", 'a') as f:
        writer = csv.writer(f)
        writer.writerow(value)

0条回答
登录 后发表回答