diff --git a/ocr.sh b/ocr.sh index 96da32f..7957bf3 100755 --- a/ocr.sh +++ b/ocr.sh @@ -1,8 +1,9 @@ #!/bin/bash INPUT_IMAGE=$1 +MAGICK_ARGS="$INPUT_IMAGE -resize 150% -type Grayscale -threshold 95% " -raw_text=$(tesseract $INPUT_IMAGE stdout --psm 6) +raw_text=$(convert $MAGICK_ARGS - | tesseract stdin stdout| grep -v '^$') referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*') to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b') @@ -15,7 +16,7 @@ amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9] status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+') remarks=$(echo "$raw_text" | grep Remarks | sed 's/Remarks //') from=$(echo "$raw_text" | grep From | sed 's/From //') -to_name=$(echo "$raw_text"| grep -B1 $to_number | grep -v $to_number) +to_name=$(echo "$raw_text"| grep -B1 $to_number 2>/dev/null| grep -v $to_number 2>/dev/null) json=$(cat <