Automatically cropping an image with python/PIL

2019-01-17 02:07发布

问题:

Can anyone help me figure out what's happening in my image auto-cropping script? I have a png image with a large transparent area/space. I would like to be able to automatically crop that space out and leave the essentials. Original image has a squared canvas, optimally it would be rectangular, encapsulating just the molecule.

here's the original image:

Doing some googling i came across PIL/python code that was reported to work, however in my hands, running the code below over-crops the image.

import Image
import sys

image=Image.open('L_2d.png')
image.load()

imageSize = image.size
imageBox = image.getbbox()

imageComponents = image.split()

rgbImage = Image.new("RGB", imageSize, (0,0,0))
rgbImage.paste(image, mask=imageComponents[3])
croppedBox = rgbImage.getbbox()
print imageBox
print croppedBox
if imageBox != croppedBox:
    cropped=image.crop(croppedBox)
    print 'L_2d.png:', "Size:", imageSize, "New Size:",croppedBox
    cropped.save('L_2d_cropped.png')

the output is this:

Can anyone more familiar with image-processing/PLI can help me figure out the issue?

回答1:

You can use numpy, convert the image to array, find all non-empty columns and rows and then create an image from these:

import Image
import numpy as np

image=Image.open('L_2d.png')
image.load()

image_data = np.asarray(image)
image_data_bw = image_data.max(axis=2)
non_empty_columns = np.where(image_data_bw.max(axis=0)>0)[0]
non_empty_rows = np.where(image_data_bw.max(axis=1)>0)[0]
cropBox = (min(non_empty_rows), max(non_empty_rows), min(non_empty_columns), max(non_empty_columns))

image_data_new = image_data[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1 , :]

new_image = Image.fromarray(image_data_new)
new_image.save('L_2d_cropped.png')

The result looks like

If anything is unclear, just ask.



回答2:

For me it works as:

import Image

image=Image.open('L_2d.png')

imageBox = image.getbbox()
cropped=image.crop(imageBox)
cropped.save('L_2d_cropped.png')

When you search for boundaries by mask=imageComponents[3], you search only by blue channel.



回答3:

I tested most of the answers replied in this post, however, I was ended up my own answer. I used anaconda python3.

from PIL import Image, ImageChops

def trim(im):
    bg = Image.new(im.mode, im.size, im.getpixel((0,0)))
    diff = ImageChops.difference(im, bg)
    diff = ImageChops.add(diff, diff, 2.0, -100)
    bbox = diff.getbbox()
    if bbox:
        return im.crop(bbox)

if __name__ == "__main__":
    bg = Image.open("test.jpg") # The image to be cropped
    new_im = trim(bg)
    new_im.show()


回答4:

Here's another version using pyvips.

This one is a little fancier: it looks at the pixel at (0, 0), assumes that to be the background colour, then does a median filter and finds the first and last row and column containing a pixel which differs from that by more than a threshold. This extra processing means it also works on photographic or compressed images, where a simple trim can be thrown off by noise or compression artifacts.

import sys
import pyvips

# An equivalent of ImageMagick's -trim in libvips ... automatically remove
# "boring" image edges.

# We use .project to sum the rows and columns of a 0/255 mask image, the first
# non-zero row or column is the object edge. We make the mask image with an
# amount-differnt-from-background image plus a threshold.

im = pyvips.Image.new_from_file(sys.argv[1])

# find the value of the pixel at (0, 0) ... we will search for all pixels 
# significantly different from this
background = im(0, 0)

# we need to smooth the image, subtract the background from every pixel, take 
# the absolute value of the difference, then threshold
mask = (im.median(3) - background).abs() > 10

# sum mask rows and columns, then search for the first non-zero sum in each
# direction
columns, rows = mask.project()

# .profile() returns a pair (v-profile, h-profile) 
left = columns.profile()[1].min()
right = columns.width - columns.fliphor().profile()[1].min()
top = rows.profile()[0].min()
bottom = rows.height - rows.flipver().profile()[0].min()

# and now crop the original image

im = im.crop(left, top, right - left, bottom - top)

im.write_to_file(sys.argv[2])

Here it is running on an 8k x 8k pixel NASA earth image:

$ time ./trim.py /data/john/pics/city_lights_asia_night_8k.jpg x.jpg
real    0m1.868s
user    0m13.204s
sys     0m0.280s
peak memory: 100mb

Before:

After:

There's a blog post with some more discussion here.



回答5:

Came across this post recently and noticed the PIL library has changed. I re-implemented this with openCV:

import cv2

def crop_im(im, padding=0.1):
    """
    Takes cv2 image, im, and padding % as a float, padding,
    and returns cropped image.
    """
    bw = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    rows, cols = bw.shape
    non_empty_columns = np.where(bw.min(axis=0)<255)[0]
    non_empty_rows = np.where(bw.min(axis=1)<255)[0]
    cropBox = (min(non_empty_rows) * (1 - padding),
                min(max(non_empty_rows) * (1 + padding), rows),
                min(non_empty_columns) * (1 - padding),
                min(max(non_empty_columns) * (1 + padding), cols))
    cropped = im[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1 , :]

    return cropped

im = cv2.imread('testimage.png')
cropped = crop_im(im)
cv2.imshow('', cropped)
cv2.waitKey(0)


回答6:

I know that this post is old but, for some reason, none of the suggested answers worked for me. So I hacked my own version from existing answers:

import Image
import numpy as np
import glob
import shutil
import os

grey_tolerance = 0.7 # (0,1) = crop (more,less)

f = 'test_image.png'
file,ext = os.path.splitext(f)

def get_cropped_line(non_empty_elms,tolerance,S):
    if (sum(non_empty_elms) == 0):
        cropBox = ()
    else:
        non_empty_min = non_empty_elms.argmax()
        non_empty_max = S - non_empty_elms[::-1].argmax()+1
        cropBox = (non_empty_min,non_empty_max)
    return cropBox

def get_cropped_area(image_bw,tol):
    max_val = image_bw.max()
    tolerance = max_val*tol
    non_empty_elms = (image_bw<=tolerance).astype(int)
    S = non_empty_elms.shape
    # Traverse rows
    cropBox = [get_cropped_line(non_empty_elms[k,:],tolerance,S[1]) for k in range(0,S[0])]
    cropBox = filter(None, cropBox)
    xmin = [k[0] for k in cropBox]
    xmax = [k[1] for k in cropBox]
    # Traverse cols
    cropBox = [get_cropped_line(non_empty_elms[:,k],tolerance,S[0]) for k in range(0,S[1])]
    cropBox = filter(None, cropBox)
    ymin = [k[0] for k in cropBox]
    ymax = [k[1] for k in cropBox]
    xmin = min(xmin)
    xmax = max(xmax)
    ymin = min(ymin)
    ymax = max(ymax)
    ymax = ymax-1 # Not sure why this is necessary, but it seems to be.
    cropBox = (ymin, ymax-ymin, xmin, xmax-xmin)
    return cropBox

def auto_crop(f,ext):
    image=Image.open(f)
    image.load()
    image_data = np.asarray(image)
    image_data_bw = image_data[:,:,0]+image_data[:,:,1]+image_data[:,:,2]
    cropBox = get_cropped_area(image_data_bw,grey_tolerance)
    image_data_new = image_data[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1 , :]
    new_image = Image.fromarray(image_data_new)
    f_new = f.replace(ext,'')+'_cropped'+ext
    new_image.save(f_new)


回答7:

This is an improvement over snew's reply, which works for transparent background. With mathematical morphology we can make it work on white background (instead of transparent), with the following code:

from PIL import Image
from skimage.io import imread
from skimage.morphology import convex_hull_image
im = imread('L_2d.jpg')
plt.imshow(im)
plt.title('input image')
plt.show()
# create a binary image
im1 = 1 - rgb2gray(im)
threshold = 0.5
im1[im1 <= threshold] = 0
im1[im1 > threshold] = 1
chull = convex_hull_image(im1)
plt.imshow(chull)
plt.title('convex hull in the binary image')
plt.show()
imageBox = Image.fromarray((chull*255).astype(np.uint8)).getbbox()
cropped = Image.fromarray(im).crop(imageBox)
cropped.save('L_2d_cropped.jpg')
plt.imshow(cropped)
plt.show()