diff --git a/bml.py b/bml.py deleted file mode 100644 index a410a95..0000000 --- a/bml.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys -import pytesseract -from PIL import Image -import json - -def ocr_image_to_json(image_path): - try: - # Open the image file - image = Image.open(image_path) - - # Perform OCR on the image - ocr_result = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT) - - # Convert the OCR result to JSON - ocr_json = json.dumps(ocr_result, indent=4) - print(ocr_json) - except Exception as e: - print(f"Error: {e}") - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python ocr_to_json.py ") - else: - image_path = sys.argv[1] - ocr_image_to_json(image_path) diff --git a/ocr.sh b/ocr.sh new file mode 100755 index 0000000..e206b0b --- /dev/null +++ b/ocr.sh @@ -0,0 +1,5 @@ +#!/bin/bash + + +INPUT_IMAGE=$1 +tesseract $INPUT_IMAGE stdout diff --git a/printall.py b/printall.py deleted file mode 100644 index 241b020..0000000 --- a/printall.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys -import pytesseract -from PIL import Image - -def ocr_image_to_text(image_path): - try: - # Open the image file - image = Image.open(image_path) - - # Perform OCR on the image - ocr_result = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT) - - # Extract and print the text portion - text = [word for word in ocr_result['text'] if word.strip() != ""] - print(" ".join(text)) - except Exception as e: - print(f"Error: {e}") - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python ocr_to_text.py ") - else: - image_path = sys.argv[1] - ocr_image_to_text(image_path) - diff --git a/processed.py b/processed.py deleted file mode 100644 index 0480964..0000000 --- a/processed.py +++ /dev/null @@ -1,70 +0,0 @@ -import sys -import pytesseract -from PIL import Image -import re -import json - -def ocr_image_to_json(image_path): - try: - # Open the image file - image = Image.open(image_path) - - # Perform OCR on the image - ocr_result = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT) - text = " ".join([word for word in ocr_result['text'] if word.strip() != ""]) - - # Debug: Print the extracted text - print("Extracted Text:", text) - - # Regex patterns - reference_pattern = r"BLAZ\d{12}" - date_pattern = r"\d{2}/\d{2}/\d{4}\s?\d{2}:\d{2}" - amount_pattern = r"MVR\s?(\d+\.\d{2})" - to_address_pattern = r"(9\d{17}|7\d{13})" - - # Extracting data using regex - reference = re.search(reference_pattern, text) - transaction_date = re.search(date_pattern, text) - amount = re.search(amount_pattern, text) - to_address = re.search(to_address_pattern, text) - - # Debug: Print the matched regex groups - print("Reference:", reference.group() if reference else "Not found") - print("Transaction Date:", transaction_date.group() if transaction_date else "Not found") - print("Amount:", amount.group(1) if amount else "Not found") - print("To Address:", to_address.group() if to_address else "Not found") - - # Find 'from' and 'to' names - names_pattern = re.compile(r"([A-Z]+\s?[A-Z.]+)") - names = names_pattern.findall(text) - - # Debug: Print the names found - print("Names Found:", names) - - from_name, to_name = "", "" - if len(names) > 1: - from_name, to_name = names[0], names[1] - - # Prepare the result in JSON format - result = { - "status": "SUCCESS", - "message": "Thank you. Transfer transaction is successful.", - "reference": reference.group() if reference else "", - "transaction_date": transaction_date.group() if transaction_date else "", - "from": from_name, - "to_name": to_name, - "to_account": to_address.group() if to_address else "", - "amount": amount.group(1) if amount else "" - } - - # Print the result as JSON - print(json.dumps(result, indent=4)) - except Exception as e: - print(f"Error: {e}") - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python ocr_to_json.py ") - else: - image_path = sys.argv[1] - ocr_image_to_json(image_path)