Compare commits
12 Commits
2a527b509e
...
main
Author | SHA1 | Date | |
---|---|---|---|
32c43c13ce
|
|||
8b7830bddc
|
|||
cdf994fd54
|
|||
4b196c578f
|
|||
0f58bbc330
|
|||
e1e69a85f6
|
|||
35c802bf7a
|
|||
69f6877905
|
|||
f9ef381c90
|
|||
115313ce3a
|
|||
00dcaf8a2b
|
|||
bfd3378c2e
|
21
.build/Dockerfile
Normal file
21
.build/Dockerfile
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
FROM python:3.9.16-slim-bullseye
|
||||||
|
|
||||||
|
# Set build shell to bash, default has has some issues sometimes
|
||||||
|
SHELL ["/bin/bash", "-c"]
|
||||||
|
|
||||||
|
# Install packges
|
||||||
|
RUN apt update \
|
||||||
|
&& apt install --no-install-recommends -y imagemagick jq curl tesseract-ocr nano
|
||||||
|
|
||||||
|
WORKDIR /var/www/html
|
||||||
|
COPY . /var/www/html/
|
||||||
|
RUN chmod 777 .
|
||||||
|
|
||||||
|
# del usesless files
|
||||||
|
RUN rm -rf \
|
||||||
|
/var/lib/{apt,dpkg} \
|
||||||
|
/var/{cache,log,spool} \
|
||||||
|
/var/www/html/{.git,.build,README.md,env.example,docker-compose.yml,.gitignore}
|
||||||
|
RUN mkdir -p uploads && chmod 777 -R uploads/
|
||||||
|
|
||||||
|
CMD python3 -m http.server --cgi 8000 --directory public
|
7
.build/docker-compose.yml
Normal file
7
.build/docker-compose.yml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
services:
|
||||||
|
reciptocr:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: .build/Dockerfile
|
||||||
|
hostname: reciptocr
|
||||||
|
image: git.shihaam.dev/shihaam/recipt-ocr-api
|
7
compose.yml
Normal file
7
compose.yml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
services:
|
||||||
|
#########################
|
||||||
|
reciptocr:
|
||||||
|
hostname: reciptocr
|
||||||
|
image: git.shihaam.dev/shihaam/recipt-ocr-api
|
||||||
|
ports:
|
||||||
|
- 8000:8000
|
10
ocr.sh
10
ocr.sh
@ -1,8 +1,9 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
INPUT_IMAGE=$1
|
INPUT_IMAGE=$1
|
||||||
|
MAGICK_ARGS="$INPUT_IMAGE -resize 150% -type Grayscale -threshold 95% "
|
||||||
|
|
||||||
raw_text=$(tesseract $INPUT_IMAGE stdout --psm 6)
|
raw_text=$(convert $MAGICK_ARGS - | tesseract stdin stdout| grep -v '^$')
|
||||||
|
|
||||||
referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*')
|
referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*')
|
||||||
to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b')
|
to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b')
|
||||||
@ -13,6 +14,9 @@ time=$(echo "$raw_text" | grep -oE '[0-2][0-9]:[0-5][0-9]')
|
|||||||
currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}')
|
currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}')
|
||||||
amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})')
|
amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})')
|
||||||
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
|
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
|
||||||
|
remarks=$(echo "$raw_text" | grep Remarks | sed 's/Remarks //')
|
||||||
|
from=$(echo "$raw_text" | grep From | sed 's/From //')
|
||||||
|
to_name=$(echo "$raw_text"| grep -B1 $to_number 2>/dev/null| grep -v $to_number 2>/dev/null)
|
||||||
|
|
||||||
json=$(cat <<EOF
|
json=$(cat <<EOF
|
||||||
{
|
{
|
||||||
@ -20,7 +24,6 @@ json=$(cat <<EOF
|
|||||||
"Currency": "$currency",
|
"Currency": "$currency",
|
||||||
"Date": "$date $time",
|
"Date": "$date $time",
|
||||||
"From": "$from",
|
"From": "$from",
|
||||||
"Message": "$message",
|
|
||||||
"referece": "$referece",
|
"referece": "$referece",
|
||||||
"Remarks": "$remarks",
|
"Remarks": "$remarks",
|
||||||
"Status": "$status",
|
"Status": "$status",
|
||||||
@ -32,5 +35,4 @@ json=$(cat <<EOF
|
|||||||
EOF
|
EOF
|
||||||
)
|
)
|
||||||
|
|
||||||
echo $json | jq
|
echo $json
|
||||||
echo $raw_text
|
|
||||||
|
12
ocr_raw.sh
12
ocr_raw.sh
@ -1,12 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
|
|
||||||
INPUT_IMAGE=$1
|
|
||||||
|
|
||||||
raw_text=$(tesseract $INPUT_IMAGE stdout)
|
|
||||||
|
|
||||||
echo $raw_text
|
|
||||||
echo ""
|
|
||||||
echo +++real raw+++
|
|
||||||
echo ""
|
|
||||||
tesseract $INPUT_IMAGE stdout
|
|
35
public/cgi-bin/api.cgi
Executable file
35
public/cgi-bin/api.cgi
Executable file
@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import cgi
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
UPLOAD_DIR = './uploads/'
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Content-Type: application/json\n")
|
||||||
|
|
||||||
|
form = cgi.FieldStorage()
|
||||||
|
|
||||||
|
fileitem = form['file']
|
||||||
|
|
||||||
|
if fileitem.filename:
|
||||||
|
fn = os.path.basename(fileitem.filename)
|
||||||
|
file_path = os.path.join(UPLOAD_DIR, fn)
|
||||||
|
open(file_path, 'wb').write(fileitem.file.read())
|
||||||
|
result = run_ocr_script(file_path)
|
||||||
|
print(result)
|
||||||
|
os.remove(file_path)
|
||||||
|
else:
|
||||||
|
print("No file was uploaded.")
|
||||||
|
|
||||||
|
def run_ocr_script(file_path):
|
||||||
|
try:
|
||||||
|
completed_process = subprocess.run(['./ocr.sh', file_path], check=True, text=True, capture_output=True)
|
||||||
|
return completed_process.stdout
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
return f"An error occurred: {e}"
|
||||||
|
except Exception as e:
|
||||||
|
return f"Unexpected error: {e}"
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
59
public/index.html
Normal file
59
public/index.html
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Recipt OCR API</title>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
background-color: #f4f4f9;
|
||||||
|
color: #333;
|
||||||
|
margin: 40px;
|
||||||
|
display: flex;
|
||||||
|
justify-content: center;
|
||||||
|
align-items: center;
|
||||||
|
height: 90vh;
|
||||||
|
}
|
||||||
|
form {
|
||||||
|
background-color: #fff;
|
||||||
|
padding: 20px;
|
||||||
|
border-radius: 8px;
|
||||||
|
box-shadow: 0 0 10px rgba(0,0,0,0.1);
|
||||||
|
}
|
||||||
|
h1 {
|
||||||
|
color: #5d5d5d;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
label {
|
||||||
|
margin-bottom: 10px;
|
||||||
|
display: block;
|
||||||
|
font-size: 16px;
|
||||||
|
}
|
||||||
|
input[type="file"] {
|
||||||
|
display: block;
|
||||||
|
margin-top: 5px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
input[type="submit"] {
|
||||||
|
background-color: #4CAF50;
|
||||||
|
color: white;
|
||||||
|
padding: 10px 20px;
|
||||||
|
border: none;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 16px;
|
||||||
|
}
|
||||||
|
input[type="submit"]:hover {
|
||||||
|
background-color: #45a049;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Upload Recipt</h1>
|
||||||
|
<form action="/reciptocr/api.cgi" method="post" enctype="multipart/form-data">
|
||||||
|
<label for="file">Select an image (PNG or JPEG):</label>
|
||||||
|
<input type="file" name="file" id="file" accept=".png, .jpeg, .jpg">
|
||||||
|
<input type="submit" value="Upload">
|
||||||
|
</form>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -23,7 +23,9 @@ time=$(echo "$raw_text" | grep -oE '[0-2][0-9]:[0-5][0-9]')
|
|||||||
currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}')
|
currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}')
|
||||||
amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})')
|
amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})')
|
||||||
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
|
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
|
||||||
from=$(echo "$raw_text" | grep -oP '(?<=From\s)[A-Z\s]+(?=\s[A-Z][a-z])')
|
remarks=$(echo "$raw_text" | grep Remarks | sed 's/Remarks //')
|
||||||
|
from=$(echo "$raw_text" | grep From | sed 's/From //')
|
||||||
|
to_name=$(echo "$raw_text"| grep -B1 $to_number | grep -v $to_number)
|
||||||
|
|
||||||
json=$(cat <<EOF
|
json=$(cat <<EOF
|
||||||
{
|
{
|
||||||
|
0
uploads/.gitkeep
Normal file
0
uploads/.gitkeep
Normal file
Reference in New Issue
Block a user