Spaces:
Running
Running
| import logging | |
| import cv2 | |
| import os | |
| import numpy as np | |
| from PIL import Image, ImageEnhance | |
| from .base import BaseOCR | |
| from .gradio_ocr import GradioOCREngine | |
| class OCREngine(BaseOCR): | |
| def __init__(self, engine_type='paddle'): | |
| self.engine_type = engine_type | |
| self.ocr = None | |
| self.gradio_fallback = None | |
| self._initialize_engine() | |
| def _initialize_engine(self): | |
| logging.info(f"Initializing OCR engine: {self.engine_type}") | |
| # Pre-emptive Gradio initialization as it's the most reliable fallback | |
| try: | |
| self.gradio_fallback = GradioOCREngine() | |
| except Exception as e: | |
| logging.error(f"Failed to pre-initialize Gradio fallback: {e}") | |
| if self.engine_type == 'paddle': | |
| try: | |
| from paddleocr import PaddleOCR | |
| self.ocr = PaddleOCR(use_angle_cls=False, lang='en', show_log=False) | |
| logging.info("PaddleOCR engine initialized successfully.") | |
| except Exception as e: | |
| logging.warning(f"Failed to initialize PaddleOCR: {e}. Switching to EasyOCR fallback.") | |
| self.engine_type = 'easyocr' | |
| if self.engine_type == 'easyocr': | |
| try: | |
| import easyocr | |
| self.ocr = easyocr.Reader(['en']) | |
| logging.info("EasyOCR engine initialized successfully.") | |
| except Exception as e: | |
| logging.error(f"Failed to initialize EasyOCR: {e}. OCR will be partially unavailable.") | |
| self.ocr = None | |
| def preprocess_image(self, image_path, scale=2): | |
| try: | |
| image = cv2.imread(image_path) | |
| if image is None: | |
| logging.error(f"Image not found or unreadable: {image_path}") | |
| return None | |
| # Upscale | |
| height, width = image.shape[:2] | |
| image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC) | |
| # Denoise | |
| image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) | |
| # Sharpen | |
| kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) | |
| image = cv2.filter2D(image, -1, kernel) | |
| # Enhance Contrast | |
| pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
| enhancer = ImageEnhance.Contrast(pil_img) | |
| enhanced_image = enhancer.enhance(1.5) | |
| logging.debug(f"Preprocessing completed for {image_path}") | |
| return cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR) | |
| except Exception as e: | |
| logging.error(f"Error during image preprocessing for {image_path}: {e}") | |
| return None | |
| def extract_text(self, image_path: str) -> str: | |
| logging.info(f"Starting text extraction for: {os.path.basename(image_path)}") | |
| # Tiered Extraction Strategy: | |
| # 1. Primary Engine (Paddle/EasyOCR) | |
| # 2. Gradio Remote Fallback (Very reliable) | |
| extracted_text = "" | |
| # 1. Local OCR | |
| if self.engine_type == 'paddle' and self.ocr: | |
| try: | |
| processed_img = self.preprocess_image(image_path) | |
| if processed_img is not None: | |
| results = self.ocr.ocr(processed_img) | |
| if results and results[0]: | |
| extracted_text = " ".join([line[1][0] for line in results[0]]) | |
| except Exception as e: | |
| logging.error(f"PaddleOCR crashed: {e}") | |
| elif self.engine_type == 'easyocr' and self.ocr: | |
| try: | |
| processed_img = self.preprocess_image(image_path) | |
| if processed_img is not None: | |
| results = self.ocr.readtext(processed_img) | |
| extracted_text = " ".join([res[1] for res in results]) | |
| except Exception as e: | |
| logging.error(f"EasyOCR crashed: {e}") | |
| # 2. Gradio Fallback if Local failed | |
| if not extracted_text and self.gradio_fallback: | |
| logging.info("Local OCR failed or returned empty. Trying Gradio OCR fallback...") | |
| extracted_text = self.gradio_fallback.extract_text(image_path) | |
| if extracted_text: | |
| logging.info(f"OCR extracted {len(extracted_text)} characters using {'Gradio' if not extracted_text else self.engine_type}.") | |
| else: | |
| logging.error("All OCR methods failed to extract text.") | |
| return extracted_text | |
| def process(self, image_path: str) -> str: | |
| return self.extract_text(image_path) | |