It's a letter recognition task and there are 284 images, and 19 classes. I want to apply naive bayesian. First I have to convert each image to feature vector and for reducing extra info I should use some feature selection code like cropping images to remove extra black borders. But I'm not much experienced in python.
How can I crop black spaces in images in order to decrease the size of csv files? ( because the columns are more than expected!) And also how can I resize images to be the same size?
from PIL import Image, ImageChops
from resize import trim
import numpy as np
import cv2
import os
import csv
#Useful function
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
# load the original image
myFileList = createFileList('image_ocr')
#print(myFileList)
for file in myFileList:
#print(file)
img_file = Image.open(file)
# img_file.show()
# get original image parameters...
width, height = img_file.size
format = img_file.format
mode = img_file.mode
# Make image Greyscale
img_grey = img_file.convert('L')
# Save Greyscale values
value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
value = value.flatten()
#print(value)
with open("trainData.csv", 'a') as f:
writer = csv.writer(f)
writer.writerow(value)