diff --git a/ocr.sh b/ocr.sh index e206b0b..344b837 100755 --- a/ocr.sh +++ b/ocr.sh @@ -2,4 +2,32 @@ INPUT_IMAGE=$1 -tesseract $INPUT_IMAGE stdout + +raw_text=$(tesseract $INPUT_IMAGE stdout) + +referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*') +to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b') +date=$(echo "$raw_text" | grep -oE '\b[0-3][0-9]/[0-1][0-9]/[0-9]{4}\b') +time=$(echo "$raw_text" | grep -oE '\b[0-2][0-9]:[0-5][0-9]\b') +currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}') +amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})') + +json_output=$(cat <