From 8979b754ddbac600f8e5744ffe96b9923505bc45 Mon Sep 17 00:00:00 2001 From: Shihaam Abdul Rahman Date: Sat, 3 Aug 2024 00:20:16 +0500 Subject: [PATCH] amount, currency, date, reference, account works --- ocr.sh | 30 +++++++++++++++++++++++++++++- ocr_raw.sh | 12 ++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100755 ocr_raw.sh diff --git a/ocr.sh b/ocr.sh index e206b0b..344b837 100755 --- a/ocr.sh +++ b/ocr.sh @@ -2,4 +2,32 @@ INPUT_IMAGE=$1 -tesseract $INPUT_IMAGE stdout + +raw_text=$(tesseract $INPUT_IMAGE stdout) + +referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*') +to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b') +date=$(echo "$raw_text" | grep -oE '\b[0-3][0-9]/[0-1][0-9]/[0-9]{4}\b') +time=$(echo "$raw_text" | grep -oE '\b[0-2][0-9]:[0-5][0-9]\b') +currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}') +amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})') + +json_output=$(cat <