Compare commits
12 Commits
2a527b509e
...
main
Author | SHA1 | Date | |
---|---|---|---|
32c43c13ce
|
|||
8b7830bddc
|
|||
cdf994fd54
|
|||
4b196c578f
|
|||
0f58bbc330
|
|||
e1e69a85f6
|
|||
35c802bf7a
|
|||
69f6877905
|
|||
f9ef381c90
|
|||
115313ce3a
|
|||
00dcaf8a2b
|
|||
bfd3378c2e
|
21
.build/Dockerfile
Normal file
21
.build/Dockerfile
Normal file
@ -0,0 +1,21 @@
|
||||
FROM python:3.9.16-slim-bullseye
|
||||
|
||||
# Set build shell to bash, default has has some issues sometimes
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# Install packges
|
||||
RUN apt update \
|
||||
&& apt install --no-install-recommends -y imagemagick jq curl tesseract-ocr nano
|
||||
|
||||
WORKDIR /var/www/html
|
||||
COPY . /var/www/html/
|
||||
RUN chmod 777 .
|
||||
|
||||
# del usesless files
|
||||
RUN rm -rf \
|
||||
/var/lib/{apt,dpkg} \
|
||||
/var/{cache,log,spool} \
|
||||
/var/www/html/{.git,.build,README.md,env.example,docker-compose.yml,.gitignore}
|
||||
RUN mkdir -p uploads && chmod 777 -R uploads/
|
||||
|
||||
CMD python3 -m http.server --cgi 8000 --directory public
|
7
.build/docker-compose.yml
Normal file
7
.build/docker-compose.yml
Normal file
@ -0,0 +1,7 @@
|
||||
services:
|
||||
reciptocr:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: .build/Dockerfile
|
||||
hostname: reciptocr
|
||||
image: git.shihaam.dev/shihaam/recipt-ocr-api
|
7
compose.yml
Normal file
7
compose.yml
Normal file
@ -0,0 +1,7 @@
|
||||
services:
|
||||
#########################
|
||||
reciptocr:
|
||||
hostname: reciptocr
|
||||
image: git.shihaam.dev/shihaam/recipt-ocr-api
|
||||
ports:
|
||||
- 8000:8000
|
10
ocr.sh
10
ocr.sh
@ -1,8 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
INPUT_IMAGE=$1
|
||||
MAGICK_ARGS="$INPUT_IMAGE -resize 150% -type Grayscale -threshold 95% "
|
||||
|
||||
raw_text=$(tesseract $INPUT_IMAGE stdout --psm 6)
|
||||
raw_text=$(convert $MAGICK_ARGS - | tesseract stdin stdout| grep -v '^$')
|
||||
|
||||
referece=$(echo "$raw_text" | grep -o 'BLAZ[0-9]*')
|
||||
to_number=$(echo "$raw_text" | grep -oE '\b9[0-9]{16}\b|\b7[0-9]{12}\b')
|
||||
@ -13,6 +14,9 @@ time=$(echo "$raw_text" | grep -oE '[0-2][0-9]:[0-5][0-9]')
|
||||
currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}')
|
||||
amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})')
|
||||
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
|
||||
remarks=$(echo "$raw_text" | grep Remarks | sed 's/Remarks //')
|
||||
from=$(echo "$raw_text" | grep From | sed 's/From //')
|
||||
to_name=$(echo "$raw_text"| grep -B1 $to_number 2>/dev/null| grep -v $to_number 2>/dev/null)
|
||||
|
||||
json=$(cat <<EOF
|
||||
{
|
||||
@ -20,7 +24,6 @@ json=$(cat <<EOF
|
||||
"Currency": "$currency",
|
||||
"Date": "$date $time",
|
||||
"From": "$from",
|
||||
"Message": "$message",
|
||||
"referece": "$referece",
|
||||
"Remarks": "$remarks",
|
||||
"Status": "$status",
|
||||
@ -32,5 +35,4 @@ json=$(cat <<EOF
|
||||
EOF
|
||||
)
|
||||
|
||||
echo $json | jq
|
||||
echo $raw_text
|
||||
echo $json
|
||||
|
12
ocr_raw.sh
12
ocr_raw.sh
@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
INPUT_IMAGE=$1
|
||||
|
||||
raw_text=$(tesseract $INPUT_IMAGE stdout)
|
||||
|
||||
echo $raw_text
|
||||
echo ""
|
||||
echo +++real raw+++
|
||||
echo ""
|
||||
tesseract $INPUT_IMAGE stdout
|
35
public/cgi-bin/api.cgi
Executable file
35
public/cgi-bin/api.cgi
Executable file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import cgi
|
||||
import os
|
||||
import subprocess
|
||||
UPLOAD_DIR = './uploads/'
|
||||
|
||||
def main():
|
||||
print("Content-Type: application/json\n")
|
||||
|
||||
form = cgi.FieldStorage()
|
||||
|
||||
fileitem = form['file']
|
||||
|
||||
if fileitem.filename:
|
||||
fn = os.path.basename(fileitem.filename)
|
||||
file_path = os.path.join(UPLOAD_DIR, fn)
|
||||
open(file_path, 'wb').write(fileitem.file.read())
|
||||
result = run_ocr_script(file_path)
|
||||
print(result)
|
||||
os.remove(file_path)
|
||||
else:
|
||||
print("No file was uploaded.")
|
||||
|
||||
def run_ocr_script(file_path):
|
||||
try:
|
||||
completed_process = subprocess.run(['./ocr.sh', file_path], check=True, text=True, capture_output=True)
|
||||
return completed_process.stdout
|
||||
except subprocess.CalledProcessError as e:
|
||||
return f"An error occurred: {e}"
|
||||
except Exception as e:
|
||||
return f"Unexpected error: {e}"
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
59
public/index.html
Normal file
59
public/index.html
Normal file
@ -0,0 +1,59 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Recipt OCR API</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f4f4f9;
|
||||
color: #333;
|
||||
margin: 40px;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
height: 90vh;
|
||||
}
|
||||
form {
|
||||
background-color: #fff;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 0 10px rgba(0,0,0,0.1);
|
||||
}
|
||||
h1 {
|
||||
color: #5d5d5d;
|
||||
text-align: center;
|
||||
}
|
||||
label {
|
||||
margin-bottom: 10px;
|
||||
display: block;
|
||||
font-size: 16px;
|
||||
}
|
||||
input[type="file"] {
|
||||
display: block;
|
||||
margin-top: 5px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
input[type="submit"] {
|
||||
background-color: #4CAF50;
|
||||
color: white;
|
||||
padding: 10px 20px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 16px;
|
||||
}
|
||||
input[type="submit"]:hover {
|
||||
background-color: #45a049;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Upload Recipt</h1>
|
||||
<form action="/reciptocr/api.cgi" method="post" enctype="multipart/form-data">
|
||||
<label for="file">Select an image (PNG or JPEG):</label>
|
||||
<input type="file" name="file" id="file" accept=".png, .jpeg, .jpg">
|
||||
<input type="submit" value="Upload">
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -23,7 +23,9 @@ time=$(echo "$raw_text" | grep -oE '[0-2][0-9]:[0-5][0-9]')
|
||||
currency=$(echo "$raw_text" | grep -oP '(?<=Amount\s)[A-Z]{3}')
|
||||
amount=$(echo "$raw_text" | grep -oP '(?<=Amount\s[A-Z]{3}\s)[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})')
|
||||
status=$(echo "$raw_text" | grep -oP '(?<=Status\s)[A-Z]+')
|
||||
from=$(echo "$raw_text" | grep -oP '(?<=From\s)[A-Z\s]+(?=\s[A-Z][a-z])')
|
||||
remarks=$(echo "$raw_text" | grep Remarks | sed 's/Remarks //')
|
||||
from=$(echo "$raw_text" | grep From | sed 's/From //')
|
||||
to_name=$(echo "$raw_text"| grep -B1 $to_number | grep -v $to_number)
|
||||
|
||||
json=$(cat <<EOF
|
||||
{
|
||||
|
0
uploads/.gitkeep
Normal file
0
uploads/.gitkeep
Normal file
Reference in New Issue
Block a user