-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr_extract.py
35 lines (30 loc) · 1.02 KB
/
ocr_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pytesseract
from dotenv import load_dotenv
import os
from PIL import Image
class OCRExtractor:
def __init__(self):
load_dotenv()
self.tesseract_path = os.getenv("TESSERACT_PATH")
pytesseract.pytesseract.tesseract_cmd = self.tesseract_path
def read_image(self, image_path):
try:
image = Image.open(image_path)
return image
except Exception as e:
print(f"Error reading image: {e}")
return None
def extract_text_from_image(self, image):
try:
text = pytesseract.image_to_string(image, lang="eng")
return text
except Exception as e:
print(f"Error extracting text: {e}")
return ""
def save_text_to_file(self, text, output_path):
try:
with open(output_path, "w", encoding="utf-8") as file:
file.write(text)
print("Successfully extracted data")
except Exception as e:
print(f"Error saving text to file: {e}")