recipt-ocr-api/splitv2.sh

#!/bin/bash

IMAGE_PATH=$1
IMAGE_HEIGHT=$(magick identify -format "%h" "$IMAGE_PATH")
IMAGE_WIDTH=$(magick identify -format "%w" "$IMAGE_PATH")
PART_HEIGHT=$((IMAGE_HEIGHT / 15))

# Loop to crop the image into 15 parts and perform OCR
for i in $(seq 0 14); do
    OFFSET=$((i * PART_HEIGHT))
    OCR_PART=$(magick "$IMAGE_PATH" -crop "${IMAGE_WIDTH}x${PART_HEIGHT}+0+${OFFSET}" - | tesseract - - stdout 2>/dev/null)
    OCR_RESULTS="${OCR_RESULTS} ${OCR_PART}"
done

raw_text=$OCR_RESULTS

referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*')
to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b')
#date=$(echo "$raw_text" | grep -oE '\b[0-3][0-9]/[0-1][0-9]/[0-9]{4}\b')
#time=$(echo "$raw_text" | grep -oE '\b[0-2][0-9]:[0-5][0-9]\b')
date=$(echo "$raw_text" | grep -oE '[0-3][0-9]/[0-1][0-9]/[0-9]{4}')
time=$(echo "$raw_text" | grep -oE '[0-2][0-9]:[0-5][0-9]')
currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}')
amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})')
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
remarks=$(echo "$raw_text" | grep Remarks | sed 's/Remarks //')
from=$(echo "$raw_text" | grep From | sed 's/From //')
to_name=$(echo "$raw_text"| grep -B1 $to_number | grep -v $to_number)

json=$(cat <<EOF
{
  "Amount": "$amount",
  "Currency": "$currency",
  "Date": "$date $time",
  "From": "$from",
  "Message": "$message",
  "referece": "$referece",
  "Remarks": "$remarks",
  "Status": "$status",
  "To": {
    "Account": "$to_number",
    "Name": "$to_name"
  }
}
EOF
)

echo $json | jq
echo $raw_text