This commit is contained in:
Shihaam Abdul Rahman 2024-08-03 05:28:37 +05:00
parent 4b196c578f
commit cdf994fd54
Signed by: shihaam
GPG Key ID: 6DA2E87EBC227636

9
ocr.sh
View File

@ -1,8 +1,9 @@
#!/bin/bash
INPUT_IMAGE=$1
MAGICK_ARGS="$INPUT_IMAGE -resize 150% -type Grayscale -threshold 95% "
raw_text=$(tesseract $INPUT_IMAGE stdout --psm 6)
raw_text=$(convert $MAGICK_ARGS - | tesseract stdin stdout| grep -v '^$')
referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*')
to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b')
@ -15,7 +16,7 @@ amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
remarks=$(echo "$raw_text" | grep Remarks | sed 's/Remarks //')
from=$(echo "$raw_text" | grep From | sed 's/From //')
to_name=$(echo "$raw_text"| grep -B1 $to_number | grep -v $to_number)
to_name=$(echo "$raw_text"| grep -B1 $to_number 2>/dev/null| grep -v $to_number 2>/dev/null)
json=$(cat <<EOF
{
@ -34,6 +35,4 @@ json=$(cat <<EOF
EOF
)
echo $json | jq
#echo $raw_text
#tesseract $INPUT_IMAGE stdout --psm 6
echo $json