init - huge mess

2024-08-02 22:51:03 +05:00
commit c35ff9d1dd
4 changed files with 125 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+*.jpg
+*.jpeg
+*.png
+*.save
+venv/*
--- a/bml.py
+++ b/bml.py
@ -0,0 +1,25 @@
+import sys
+import pytesseract
+from PIL import Image
+import json
+
+def ocr_image_to_json(image_path):
+    try:
+        # Open the image file
+        image = Image.open(image_path)
+        
+        # Perform OCR on the image
+        ocr_result = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
+        
+        # Convert the OCR result to JSON
+        ocr_json = json.dumps(ocr_result, indent=4)
+        print(ocr_json)
+    except Exception as e:
+        print(f"Error: {e}")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python ocr_to_json.py <image_path>")
+    else:
+        image_path = sys.argv[1]
+        ocr_image_to_json(image_path)
--- a/printall.py
+++ b/printall.py
@ -0,0 +1,25 @@
+import sys
+import pytesseract
+from PIL import Image
+
+def ocr_image_to_text(image_path):
+    try:
+        # Open the image file
+        image = Image.open(image_path)
+        
+        # Perform OCR on the image
+        ocr_result = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
+        
+        # Extract and print the text portion
+        text = [word for word in ocr_result['text'] if word.strip() != ""]
+        print(" ".join(text))
+    except Exception as e:
+        print(f"Error: {e}")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python ocr_to_text.py <image_path>")
+    else:
+        image_path = sys.argv[1]
+        ocr_image_to_text(image_path)
+
--- a/processed.py
+++ b/processed.py
@ -0,0 +1,70 @@
+import sys
+import pytesseract
+from PIL import Image
+import re
+import json
+
+def ocr_image_to_json(image_path):
+    try:
+        # Open the image file
+        image = Image.open(image_path)
+        
+        # Perform OCR on the image
+        ocr_result = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
+        text = " ".join([word for word in ocr_result['text'] if word.strip() != ""])
+        
+        # Debug: Print the extracted text
+        print("Extracted Text:", text)
+        
+        # Regex patterns
+        reference_pattern = r"BLAZ\d{12}"
+        date_pattern = r"\d{2}/\d{2}/\d{4}\s?\d{2}:\d{2}"
+        amount_pattern = r"MVR\s?(\d+\.\d{2})"
+        to_address_pattern = r"(9\d{17}|7\d{13})"
+        
+        # Extracting data using regex
+        reference = re.search(reference_pattern, text)
+        transaction_date = re.search(date_pattern, text)
+        amount = re.search(amount_pattern, text)
+        to_address = re.search(to_address_pattern, text)
+        
+        # Debug: Print the matched regex groups
+        print("Reference:", reference.group() if reference else "Not found")
+        print("Transaction Date:", transaction_date.group() if transaction_date else "Not found")
+        print("Amount:", amount.group(1) if amount else "Not found")
+        print("To Address:", to_address.group() if to_address else "Not found")
+        
+        # Find 'from' and 'to' names
+        names_pattern = re.compile(r"([A-Z]+\s?[A-Z.]+)")
+        names = names_pattern.findall(text)
+        
+        # Debug: Print the names found
+        print("Names Found:", names)
+        
+        from_name, to_name = "", ""
+        if len(names) > 1:
+            from_name, to_name = names[0], names[1]
+        
+        # Prepare the result in JSON format
+        result = {
+            "status": "SUCCESS",
+            "message": "Thank you. Transfer transaction is successful.",
+            "reference": reference.group() if reference else "",
+            "transaction_date": transaction_date.group() if transaction_date else "",
+            "from": from_name,
+            "to_name": to_name,
+            "to_account": to_address.group() if to_address else "",
+            "amount": amount.group(1) if amount else ""
+        }
+        
+        # Print the result as JSON
+        print(json.dumps(result, indent=4))
+    except Exception as e:
+        print(f"Error: {e}")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python ocr_to_json.py <image_path>")
+    else:
+        image_path = sys.argv[1]
+        ocr_image_to_json(image_path)