Compare commits

...

12 Commits

9 changed files with 138 additions and 17 deletions

21
.build/Dockerfile Normal file
View File

@ -0,0 +1,21 @@
FROM python:3.9.16-slim-bullseye
# Set build shell to bash, default has has some issues sometimes
SHELL ["/bin/bash", "-c"]
# Install packges
RUN apt update \
&& apt install --no-install-recommends -y imagemagick jq curl tesseract-ocr nano
WORKDIR /var/www/html
COPY . /var/www/html/
RUN chmod 777 .
# del usesless files
RUN rm -rf \
/var/lib/{apt,dpkg} \
/var/{cache,log,spool} \
/var/www/html/{.git,.build,README.md,env.example,docker-compose.yml,.gitignore}
RUN mkdir -p uploads && chmod 777 -R uploads/
CMD python3 -m http.server --cgi 8000 --directory public

View File

@ -0,0 +1,7 @@
services:
reciptocr:
build:
context: ..
dockerfile: .build/Dockerfile
hostname: reciptocr
image: git.shihaam.dev/shihaam/recipt-ocr-api

7
compose.yml Normal file
View File

@ -0,0 +1,7 @@
services:
#########################
reciptocr:
hostname: reciptocr
image: git.shihaam.dev/shihaam/recipt-ocr-api
ports:
- 8000:8000

10
ocr.sh
View File

@ -1,8 +1,9 @@
#!/bin/bash
INPUT_IMAGE=$1
MAGICK_ARGS="$INPUT_IMAGE -resize 150% -type Grayscale -threshold 95% "
raw_text=$(tesseract $INPUT_IMAGE stdout --psm 6)
raw_text=$(convert $MAGICK_ARGS - | tesseract stdin stdout| grep -v '^$')
referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*')
to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b')
@ -13,6 +14,9 @@ time=$(echo "$raw_text" | grep -oE '[0-2][0-9]:[0-5][0-9]')
currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}')
amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})')
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
remarks=$(echo "$raw_text" | grep Remarks | sed 's/Remarks //')
from=$(echo "$raw_text" | grep From | sed 's/From //')
to_name=$(echo "$raw_text"| grep -B1 $to_number 2>/dev/null| grep -v $to_number 2>/dev/null)
json=$(cat <<EOF
{
@ -20,7 +24,6 @@ json=$(cat <<EOF
"Currency": "$currency",
"Date": "$date $time",
"From": "$from",
"Message": "$message",
"referece": "$referece",
"Remarks": "$remarks",
"Status": "$status",
@ -32,5 +35,4 @@ json=$(cat <<EOF
EOF
)
echo $json | jq
echo $raw_text
echo $json

View File

@ -1,12 +0,0 @@
#!/bin/bash
INPUT_IMAGE=$1
raw_text=$(tesseract $INPUT_IMAGE stdout)
echo $raw_text
echo ""
echo +++real raw+++
echo ""
tesseract $INPUT_IMAGE stdout

35
public/cgi-bin/api.cgi Executable file
View File

@ -0,0 +1,35 @@
#!/usr/bin/env python3
import cgi
import os
import subprocess
UPLOAD_DIR = './uploads/'
def main():
print("Content-Type: application/json\n")
form = cgi.FieldStorage()
fileitem = form['file']
if fileitem.filename:
fn = os.path.basename(fileitem.filename)
file_path = os.path.join(UPLOAD_DIR, fn)
open(file_path, 'wb').write(fileitem.file.read())
result = run_ocr_script(file_path)
print(result)
os.remove(file_path)
else:
print("No file was uploaded.")
def run_ocr_script(file_path):
try:
completed_process = subprocess.run(['./ocr.sh', file_path], check=True, text=True, capture_output=True)
return completed_process.stdout
except subprocess.CalledProcessError as e:
return f"An error occurred: {e}"
except Exception as e:
return f"Unexpected error: {e}"
if __name__ == '__main__':
main()

59
public/index.html Normal file
View File

@ -0,0 +1,59 @@
<!DOCTYPE html>
<html>
<head>
<title>Recipt OCR API</title>
<style>
body {
font-family: Arial, sans-serif;
background-color: #f4f4f9;
color: #333;
margin: 40px;
display: flex;
justify-content: center;
align-items: center;
height: 90vh;
}
form {
background-color: #fff;
padding: 20px;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0,0,0,0.1);
}
h1 {
color: #5d5d5d;
text-align: center;
}
label {
margin-bottom: 10px;
display: block;
font-size: 16px;
}
input[type="file"] {
display: block;
margin-top: 5px;
margin-bottom: 20px;
}
input[type="submit"] {
background-color: #4CAF50;
color: white;
padding: 10px 20px;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
}
input[type="submit"]:hover {
background-color: #45a049;
}
</style>
</head>
<body>
<h1>Upload Recipt</h1>
<form action="/reciptocr/api.cgi" method="post" enctype="multipart/form-data">
<label for="file">Select an image (PNG or JPEG):</label>
<input type="file" name="file" id="file" accept=".png, .jpeg, .jpg">
<input type="submit" value="Upload">
</form>
</body>
</html>

View File

@ -23,7 +23,9 @@ time=$(echo "$raw_text" | grep -oE '[0-2][0-9]:[0-5][0-9]')
currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}')
amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})')
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
from=$(echo "$raw_text" | grep -oP '(?<=From\s)[A-Z\s]+(?=\s[A-Z][a-z])')
remarks=$(echo "$raw_text" | grep Remarks | sed 's/Remarks //')
from=$(echo "$raw_text" | grep From | sed 's/From //')
to_name=$(echo "$raw_text"| grep -B1 $to_number | grep -v $to_number)
json=$(cat <<EOF
{

0
uploads/.gitkeep Normal file
View File