init - huge mess
This commit is contained in:
commit
c35ff9d1dd
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
*.jpg
|
||||||
|
*.jpeg
|
||||||
|
*.png
|
||||||
|
*.save
|
||||||
|
venv/*
|
25
bml.py
Normal file
25
bml.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
import sys
|
||||||
|
import pytesseract
|
||||||
|
from PIL import Image
|
||||||
|
import json
|
||||||
|
|
||||||
|
def ocr_image_to_json(image_path):
|
||||||
|
try:
|
||||||
|
# Open the image file
|
||||||
|
image = Image.open(image_path)
|
||||||
|
|
||||||
|
# Perform OCR on the image
|
||||||
|
ocr_result = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
|
||||||
|
|
||||||
|
# Convert the OCR result to JSON
|
||||||
|
ocr_json = json.dumps(ocr_result, indent=4)
|
||||||
|
print(ocr_json)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print("Usage: python ocr_to_json.py <image_path>")
|
||||||
|
else:
|
||||||
|
image_path = sys.argv[1]
|
||||||
|
ocr_image_to_json(image_path)
|
25
printall.py
Normal file
25
printall.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
import sys
|
||||||
|
import pytesseract
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
def ocr_image_to_text(image_path):
|
||||||
|
try:
|
||||||
|
# Open the image file
|
||||||
|
image = Image.open(image_path)
|
||||||
|
|
||||||
|
# Perform OCR on the image
|
||||||
|
ocr_result = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
|
||||||
|
|
||||||
|
# Extract and print the text portion
|
||||||
|
text = [word for word in ocr_result['text'] if word.strip() != ""]
|
||||||
|
print(" ".join(text))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print("Usage: python ocr_to_text.py <image_path>")
|
||||||
|
else:
|
||||||
|
image_path = sys.argv[1]
|
||||||
|
ocr_image_to_text(image_path)
|
||||||
|
|
70
processed.py
Normal file
70
processed.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import sys
|
||||||
|
import pytesseract
|
||||||
|
from PIL import Image
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
def ocr_image_to_json(image_path):
|
||||||
|
try:
|
||||||
|
# Open the image file
|
||||||
|
image = Image.open(image_path)
|
||||||
|
|
||||||
|
# Perform OCR on the image
|
||||||
|
ocr_result = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
|
||||||
|
text = " ".join([word for word in ocr_result['text'] if word.strip() != ""])
|
||||||
|
|
||||||
|
# Debug: Print the extracted text
|
||||||
|
print("Extracted Text:", text)
|
||||||
|
|
||||||
|
# Regex patterns
|
||||||
|
reference_pattern = r"BLAZ\d{12}"
|
||||||
|
date_pattern = r"\d{2}/\d{2}/\d{4}\s?\d{2}:\d{2}"
|
||||||
|
amount_pattern = r"MVR\s?(\d+\.\d{2})"
|
||||||
|
to_address_pattern = r"(9\d{17}|7\d{13})"
|
||||||
|
|
||||||
|
# Extracting data using regex
|
||||||
|
reference = re.search(reference_pattern, text)
|
||||||
|
transaction_date = re.search(date_pattern, text)
|
||||||
|
amount = re.search(amount_pattern, text)
|
||||||
|
to_address = re.search(to_address_pattern, text)
|
||||||
|
|
||||||
|
# Debug: Print the matched regex groups
|
||||||
|
print("Reference:", reference.group() if reference else "Not found")
|
||||||
|
print("Transaction Date:", transaction_date.group() if transaction_date else "Not found")
|
||||||
|
print("Amount:", amount.group(1) if amount else "Not found")
|
||||||
|
print("To Address:", to_address.group() if to_address else "Not found")
|
||||||
|
|
||||||
|
# Find 'from' and 'to' names
|
||||||
|
names_pattern = re.compile(r"([A-Z]+\s?[A-Z.]+)")
|
||||||
|
names = names_pattern.findall(text)
|
||||||
|
|
||||||
|
# Debug: Print the names found
|
||||||
|
print("Names Found:", names)
|
||||||
|
|
||||||
|
from_name, to_name = "", ""
|
||||||
|
if len(names) > 1:
|
||||||
|
from_name, to_name = names[0], names[1]
|
||||||
|
|
||||||
|
# Prepare the result in JSON format
|
||||||
|
result = {
|
||||||
|
"status": "SUCCESS",
|
||||||
|
"message": "Thank you. Transfer transaction is successful.",
|
||||||
|
"reference": reference.group() if reference else "",
|
||||||
|
"transaction_date": transaction_date.group() if transaction_date else "",
|
||||||
|
"from": from_name,
|
||||||
|
"to_name": to_name,
|
||||||
|
"to_account": to_address.group() if to_address else "",
|
||||||
|
"amount": amount.group(1) if amount else ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Print the result as JSON
|
||||||
|
print(json.dumps(result, indent=4))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print("Usage: python ocr_to_json.py <image_path>")
|
||||||
|
else:
|
||||||
|
image_path = sys.argv[1]
|
||||||
|
ocr_image_to_json(image_path)
|
Loading…
x
Reference in New Issue
Block a user