diff --git a/splitv2.sh b/splitv2.sh new file mode 100755 index 0000000..6ad09ed --- /dev/null +++ b/splitv2.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +IMAGE_PATH=$1 +IMAGE_HEIGHT=$(magick identify -format "%h" "$IMAGE_PATH") +IMAGE_WIDTH=$(magick identify -format "%w" "$IMAGE_PATH") +PART_HEIGHT=$((IMAGE_HEIGHT / 15)) + +# Loop to crop the image into 15 parts and perform OCR +for i in $(seq 0 14); do + OFFSET=$((i * PART_HEIGHT)) + OCR_PART=$(magick "$IMAGE_PATH" -crop "${IMAGE_WIDTH}x${PART_HEIGHT}+0+${OFFSET}" - | tesseract - - stdout 2>/dev/null) + OCR_RESULTS="${OCR_RESULTS} ${OCR_PART}" +done + +raw_text=$OCR_RESULTS + +referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*') +to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b') +#date=$(echo "$raw_text" | grep -oE '\b[0-3][0-9]/[0-1][0-9]/[0-9]{4}\b') +#time=$(echo "$raw_text" | grep -oE '\b[0-2][0-9]:[0-5][0-9]\b') +date=$(echo "$raw_text" | grep -oE '[0-3][0-9]/[0-1][0-9]/[0-9]{4}') +time=$(echo "$raw_text" | grep -oE '[0-2][0-9]:[0-5][0-9]') +currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}') +amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})') +status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+') +from=$(echo "$raw_text" | grep -oP '(?<=From\s)[A-Z\s]+(?=\s[A-Z][a-z])') + +json=$(cat <