How can I get dimensions of image without actually downloading it? Is it even possible?
I have a list of urls of images and I want to assign width and size to it.
I know there is a way of doing it locally (How to check dimensions of all images in a directory using python?), but I don't want to download all the images.
Following ed. suggestions, I edited the code. I came up with this code. Not sure weather it downloads whole file or just a part (as I wanted).
This is based on ed's answer mixed with other things I found on the web. I ran into the same issue as grotos with .read(24). Download getimageinfo.py from here and download ReSeekFile.py from here.
import urllib2
imgdata = urllib2.urlopen(href)
image_type,width,height = getimageinfo.getImageInfo(imgdata)
Modify getimageinfo as such...
import ReseekFile
def getImageInfo(datastream):
datastream = ReseekFile.ReseekFile(datastream)
data = str(datastream.read(30))
#Skipping to jpeg
# handle JPEGs
elif (size >= 2) and data.startswith('\377\330'):
content_type = 'image/jpeg'
b = datastream.read(1)
while (b and ord(b) != 0xDA):
while (ord(b) != 0xFF): b = datastream.read(1)
while (ord(b) == 0xFF): b = datastream.read(1)
if (ord(b) >= 0xC0 and ord(b) <= 0xC3):
h, w = struct.unpack(">HH", datastream.read(4))
datastream.read(int(struct.unpack(">H", datastream.read(2))[0])-2)
b = datastream.read(1)
width = int(w)
height = int(h)
except struct.error:
except ValueError:
I found the solution on this site to work well:
import urllib
import ImageFile
def getsizes(uri):
# get file size *and* image size (None if not known)
file = urllib.urlopen(uri)
size = file.headers.get("content-length")
if size: size = int(size)
p = ImageFile.Parser()
while 1:
data = file.read(1024)
if not data:
if p.image:
return size, p.image.size
return size, None
print getsizes("http://www.pythonware.com/images/small-yoyo.gif")
# (10965, (179, 188))
If you're willing to download the first 24 bytes of each file, then this function (mentioned in johnteslade's answer to the question you mention) will work out the dimensions.
That's probably the least downloading necessary to do the job you want.
import urllib2
start = urllib2.urlopen(image_url).read(24)
Edit (1):
In the case of jpeg files it seems to need more bytes. You could edit the function so that instead of reading a StringIO.StringIO(data) it instead reads the file handle from urlopen. Then it will read exactly as much of the image as it needs to find out the width and height.
Since getimageinfo.py mentioned above doesn't work in Python3. Pillow is used instead of it.
Pillow can be found in pypi, or installed by using pip: pip install pillow
from io import BytesIO
from PIL import Image
import requests
hrefs = ['https://farm4.staticflickr.com/3894/15008518202_b016d7d289_m.jpg','https://farm4.staticflickr.com/3920/15008465772_383e697089_m.jpg','https://farm4.staticflickr.com/3902/14985871946_86abb8c56f_m.jpg']
RANGE = 5000
for href in hrefs:
req = requests.get(href,headers={'User-Agent':'Mozilla5.0(Google spider)','Range':'bytes=0-{}'.format(RANGE)})
im = Image.open(BytesIO(req.content))
I like this solution I found, which downloads chunks of the image until it can be recognized as an image file by PIL and then stops downloading. This ensures that enough of the image header gets downloaded to read the dimensions, but no more. (I found this here and here; I've adapted it for Python 3+.)
import urllib
from PIL import ImageFile
def getsizes(uri):
# get file size *and* image size (None if not known)
file = urllib.request.urlopen(uri)
size = file.headers.get("content-length")
if size:
size = int(size)
p = ImageFile.Parser()
while True:
data = file.read(1024)
if not data:
if p.image:
return size, p.image.size
return size, None
It's not possible to do it directly, but there's a workaround for that. If the files are present on the server, then implement the API endpoint that takes image name as an argument and returns the size.
But if the files are on the different server, you've got no other way but to download the files.
Unfortunately I can't comment, so this is as an answer:
Use a get query with the header
"Range": "bytes=0-30"
And then simply use
If you use python's "requests", it's simply
r = requests.get(image_url, headers={
"Range": "bytes=0-30"
image_info = get_image_info(r.content)
This fixes ed.'s answer and doesn't have any other dependencies (like ReSeekFile.py).
My fixed "getimageInfo.py", work with Python 3.4+, try it, just great!
import io
import struct
import urllib.request as urllib2
def getImageInfo(data):
data = data
size = len(data)
height = -1
width = -1
content_type = ''
# handle GIFs
if (size >= 10) and data[:6] in (b'GIF87a', b'GIF89a'):
# Check to see if content_type is correct
content_type = 'image/gif'
w, h = struct.unpack(b"<HH", data[6:10])
width = int(w)
height = int(h)
# See PNG 2. Edition spec (http://www.w3.org/TR/PNG/)
# Bytes 0-7 are below, 4-byte chunk length, then 'IHDR'
# and finally the 4-byte width, height
elif ((size >= 24) and data.startswith(b'\211PNG\r\n\032\n')
and (data[12:16] == b'IHDR')):
content_type = 'image/png'
w, h = struct.unpack(b">LL", data[16:24])
width = int(w)
height = int(h)
# Maybe this is for an older PNG version.
elif (size >= 16) and data.startswith(b'\211PNG\r\n\032\n'):
# Check to see if we have the right content type
content_type = 'image/png'
w, h = struct.unpack(b">LL", data[8:16])
width = int(w)
height = int(h)
# handle JPEGs
elif (size >= 2) and data.startswith(b'\377\330'):
content_type = 'image/jpeg'
jpeg = io.BytesIO(data)
b = jpeg.read(1)
while (b and ord(b) != 0xDA):
while (ord(b) != 0xFF): b = jpeg.read(1)
while (ord(b) == 0xFF): b = jpeg.read(1)
if (ord(b) >= 0xC0 and ord(b) <= 0xC3):
h, w = struct.unpack(b">HH", jpeg.read(4))
jpeg.read(int(struct.unpack(b">H", jpeg.read(2))[0])-2)
b = jpeg.read(1)
width = int(w)
height = int(h)
except struct.error:
except ValueError:
return content_type, width, height
#from PIL import Image
#import requests
#hrefs = ['http://farm4.staticflickr.com/3894/15008518202_b016d7d289_m.jpg','https://farm4.staticflickr.com/3920/15008465772_383e697089_m.jpg','https://farm4.staticflickr.com/3902/14985871946_86abb8c56f_m.jpg']
#RANGE = 5000
#for href in hrefs:
#req = requests.get(href,headers={'User-Agent':'Mozilla5.0(Google spider)','Range':'bytes=0-{}'.format(RANGE)})
#im = getImageInfo(req.content)
req = urllib2.Request("http://vn-sharing.net/forum/images/smilies/onion/ngai.gif", headers={"Range": "5000"})
r = urllib2.urlopen(req)
#f = open("D:\\Pictures\\1.jpg", "rb")
# Output: >> ('image/gif', 50, 50)
Source code: http://code.google.com/p/bfg-pages/source/browse/trunk/pages/getimageinfo.py