-->

Django: Validate file type of uploaded file

2019-01-16 11:48发布

问题:

I have an app that lets people upload files, represented as UploadedFiles. However, I want to make sure that users only upload xml files. I know I can do this using magic, but I don't know where to put this check - I can't put it in the clean function since the file is not yet uploaded when clean runs, as far as I can tell.

Here's the UploadedFile model:

class UploadedFile(models.Model):
    """This represents a file that has been uploaded to the server."""
    STATE_UPLOADED = 0
    STATE_ANNOTATED = 1
    STATE_PROCESSING = 2
    STATE_PROCESSED = 4
    STATES = (
        (STATE_UPLOADED, "Uploaded"),
        (STATE_ANNOTATED, "Annotated"),
        (STATE_PROCESSING, "Processing"),
        (STATE_PROCESSED, "Processed"),
    )

    status = models.SmallIntegerField(choices=STATES,
        default=0, blank=True, null=True) 
    file = models.FileField(upload_to=settings.XML_ROOT)
    project = models.ForeignKey(Project)

    def __unicode__(self):
        return self.file.name

    def name(self):
        return os.path.basename(self.file.name)

    def save(self, *args, **kwargs):
        if not self.status:
            self.status = self.STATE_UPLOADED
        super(UploadedFile, self).save(*args, **kwargs)

    def delete(self, *args, **kwargs):
        os.remove(self.file.path)
        self.file.delete(False)
        super(UploadedFile, self).delete(*args, **kwargs)

    def get_absolute_url(self):
        return u'/upload/projects/%d' % self.id

    def clean(self):
        if not "XML" in magic.from_file(self.file.url):
            raise ValidationError(u'Not an xml file.')

class UploadedFileForm(forms.ModelForm):
    class Meta:                
        model = UploadedFile
        exclude = ('project',)

回答1:

Validate files is a regular issue, so i would like to use a validators:

from django.utils.deconstruct import deconstructible
from django.template.defaultfilters import filesizeformat
import magic

@deconstructible
class FileValidator(object):
    error_messages = {
     'max_size': ("Ensure this file size is not greater than %(max_size)s."
                  " Your file size is %(size)s."),
     'min_size': ("Ensure this file size is not less than %(min_size)s. "
                  "Your file size is %(size)s."),
     'content_type': "Files of type %(content_type)s are not supported.",
    }

    def __init__(self, max_size=None, min_size=None, content_types=()):
        self.max_size = max_size
        self.min_size = min_size
        self.content_types = content_types

    def __call__(self, data):
        if self.max_size is not None and data.size > self.max_size:
            params = {
                'max_size': filesizeformat(self.max_size), 
                'size': filesizeformat(data.size),
            }
            raise ValidationError(self.error_messages['max_size'],
                                   'max_size', params)

        if self.min_size is not None and data.size < self.min_size:
            params = {
                'min_size': filesizeformat(self.mix_size),
                'size': filesizeformat(data.size)
            }
            raise ValidationError(self.error_messages['min_size'], 
                                   'min_size', params)

        if self.content_types:
            content_type = magic.from_buffer(data.read(), mime=True)
            if content_type not in self.content_types:
                params = { 'content_type': content_type }
                raise ValidationError(self.error_messages['content_type'],
                                   'content_type', params)

    def __eq__(self, other):
        return isinstance(other, FileValidator)

Then you can use FileValidator in your model.FileField or forms.FileField as follows:

validate_file = FileValidator(max_size=1024 * 100, 
                             content_types=('application/xml',))
file = models.FileField(upload_to=settings.XML_ROOT, 
                        validators=[validate_file])


回答2:

For posterity: the solution is to use the read method and pass that to magic.from_buffer.

class UploadedFileForm(ModelForm):
    def clean_file(self):
        file = self.cleaned_data.get("file", False)
        filetype = magic.from_buffer(file.read())
        if not "XML" in filetype:
            raise ValidationError("File is not XML.")
        return file

    class Meta:
        model = models.UploadedFile
        exclude = ('project',)


回答3:

From django 1.11, you can also use FileExtensionValidator.

from django.core.validators import FileExtensionValidator
class UploadedFile(models.Model):
    file = models.FileField(upload_to=settings.XML_ROOT, 
        validators=[FileExtensionValidator(allowed_extensions=['xml'])])

Note this must be used on a FileField and won't work on a CharField (for example), since the validator validates on value.name.

ref: https://docs.djangoproject.com/en/dev/ref/validators/#fileextensionvalidator



回答4:

I think what you want to do is to clean the uploaded file in Django's Form.clean_your_field_name_here() methods - the data is available on your system by then if it was submitted as normal HTTP POST request.

Also if you consider this inefficient explore the options of different Django file upload backends and how to do streaming processing.

If you need to consider the security of the system when dealing with uploads

  • Make sure uploaded file has correct extension

  • Make sure the mimetype matches the file extension

In the case you are worried about user's uploading exploit files (for attacking against your site)

  • Rewrite all the file contents on save to get rid of possible extra (exploit) payload (so you cannot embed HTML in XML which the browser would interpret as a site-origin HTML file when downloading)

  • Make sure you use content-disposition header on download

Some more info here: http://opensourcehacker.com/2013/07/31/secure-user-uploads-and-exploiting-served-user-content/

Below is my example how I sanitize the uploaded images:

class Example(models.Model):
    image = models.ImageField(upload_to=filename_gen("participant-images/"), blank=True, null=True)


class Example(forms.ModelForm):
    def clean_image(self):
        """ Clean the uploaded image attachemnt.
        """
        image = self.cleaned_data.get('image', False)
        utils.ensure_safe_user_image(image)
        return image


def ensure_safe_user_image(image):
    """ Perform various checks to sanitize user uploaded image data.

    Checks that image was valid header, then

    :param: InMemoryUploadedFile instance (Django form field value)

    :raise: ValidationError in the case the image content has issues
    """

    if not image:
        return

    assert isinstance(image, InMemoryUploadedFile), "Image rewrite has been only tested on in-memory upload backend"

    # Make sure the image is not too big, so that PIL trashes the server
    if image:
        if image._size > 4*1024*1024:
            raise ValidationError("Image file too large - the limit is 4 megabytes")

    # Then do header peak what the image claims
    image.file.seek(0)
    mime = magic.from_buffer(image.file.getvalue(), mime=True)
    if mime not in ("image/png", "image/jpeg"):
        raise ValidationError("Image is not valid. Please upload a JPEG or PNG image.")

    doc_type = mime.split("/")[-1].upper()

    # Read data from cStringIO instance
    image.file.seek(0)
    pil_image = Image.open(image.file)

    # Rewrite the image contents in the memory
    # (bails out with exception on bad data)
    buf = StringIO()
    pil_image.thumbnail((2048, 2048), Image.ANTIALIAS)
    pil_image.save(buf, doc_type)
    image.file = buf

    # Make sure the image has valid extension (can't upload .htm image)
    extension = unicode(doc_type.lower())
    if not image.name.endswith(u".%s" % extension):
        image.name = image.name + u"." + extension