Validator not saving images

43 views
Skip to first unread message

mostwanted

unread,
Mar 14, 2023, 4:55:11 PM3/14/23
to web2py-users
I have a little problem with my validator code, its saving 0 byte images and I just don't know why, the value variable is an image that actually contains an image, I have tested it several times to see what it contains and its an image but  for some reason when it comes to saving it in the database it saves an empty image, please assist me where I could be going wrong. Regards

from gluon import *
import pytesseract
from PIL import Image

class IS_RECEIPT_OR_INVOICE(object):
    def __init__(self, error_message="The uploaded file is not a receipt or invoice!"):
        self.error_message = error_message

    def __call__(self, value):
        error = None
        try:
            image = Image.open(value.file)
            text = pytesseract.image_to_string(image)
            if "receipt" not in text.lower() and "invoice" not in text.lower() and "RECEIPT" not in text and "INVOICE" not in text:
                error = self.error_message
        except:
            error = self.error_message
        return (value, error)

Leonel Câmara

unread,
Mar 17, 2023, 11:22:07 AM3/17/23
to web2py-users
What's happening here is that your validator is reading the file and it's not "rewinding" it afterwards so when the next step gets it the file descriptor is at the end of the file hence the ' bytes size.

I would recommend rewriting it to something like this (the important part being the value.file.seek(0)) note that I haven't tested this code at all.

import pytesseract
from PIL import Image
from pydal.validators import Validator, ValidationError

class IMG_HAS_TEXT(Validator):

    def __init__(self, check_tokens=None, error_message="Image doesn't have the required text"):
        self.error_message = error_message
        self.check_tokens = check_tokens or []

    def validate(self, value, record_id=None):
        try:
            image = Image.open(value.file)
            value.file.seek(0)
            text = pytesseract.image_to_string(image).lower()
            if not text or not all(token in text for token in self.check_tokens):
                raise ValidationError(self.translator(self.error_message))
            return value
        except:
            raise ValidationError(self.translator(self.error_message))

class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT):

    def __init__(self, error_message="The uploaded file is not a receipt or invoice!"):
        self.error_message = error_message
        self.check_tokens = ("receipt", "invoice")

mostwanted

unread,
Mar 18, 2023, 3:20:04 AM3/18/23
to web2py-users
Hey Leonel, thanks for your reply but the above code is still saving empty (0byte) images

Leonel Câmara

unread,
Mar 20, 2023, 1:13:41 PM3/20/23
to web2py-users
Like I said I didn't test it :) apparently you need to seek to zero after tesseract and not after image.open as tesseract is reading it. Fixed version (which I actually tested):

# -*- coding: utf-8 -*-

import pytesseract
from PIL import Image
from pydal.validators import Validator, ValidationError

class IMG_HAS_TEXT(Validator):

    def __init__(self, check_tokens=None, error_message="Image doesn't have the required text"):
        self.error_message = error_message
        self.check_tokens = check_tokens or []

    def validate(self, value, record_id=None):
        try:
            image = Image.open(value.file)            
            text = pytesseract.image_to_string(image).lower()
            value.file.seek(0)

            if not text or not all(token in text for token in self.check_tokens):
                raise ValidationError(self.translator(self.error_message))
            return value
        except Exception as e:

            raise ValidationError(self.translator(self.error_message))

class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT):

    def __init__(self, error_message="The uploaded file is not a receipt or invoice!"):
        self.error_message = error_message
        self.check_tokens = ("receipt", "invoice")
Reply all
Reply to author
Forward
0 new messages