49 lines
1.7 KiB
Python
49 lines
1.7 KiB
Python
from abc import ABC, abstractmethod
|
|
from typing import Optional, Tuple
|
|
import re
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class EntityRefiner(ABC):
|
|
"""Classe de base pour le recadrage d'entités"""
|
|
|
|
def __init__(self, entity_type: str):
|
|
self.entity_type = entity_type
|
|
|
|
@abstractmethod
|
|
def refine(self, text: str, start: int, end: int) -> Optional[Tuple[int, int]]:
|
|
"""Recadre une entité détectée"""
|
|
pass
|
|
|
|
def should_process(self, entity_type: str) -> bool:
|
|
"""Vérifie si ce raffineur doit traiter ce type d'entité"""
|
|
return entity_type == self.entity_type
|
|
|
|
class IBANRefiner(EntityRefiner):
|
|
"""Raffineur pour les IBAN"""
|
|
|
|
def __init__(self):
|
|
super().__init__("IBAN")
|
|
self.iban_regex = re.compile(r"\b[A-Z]{2}[0-9]{2}\s?(?:[A-Z0-9]{4}\s?){2,7}[A-Z0-9]{1,4}\b", re.IGNORECASE)
|
|
|
|
def refine(self, text: str, start: int, end: int) -> Optional[Tuple[int, int]]:
|
|
ent_text = text[start:end].strip()
|
|
match = self.iban_regex.search(ent_text)
|
|
|
|
if not match:
|
|
logger.warning(f"Invalid IBAN detected, skipping: '{ent_text}'")
|
|
return None
|
|
|
|
true_iban = match.group(0)
|
|
start_offset = ent_text.find(true_iban)
|
|
|
|
if start_offset == -1:
|
|
logger.warning(f"IBAN regex match but cannot find substring position: '{ent_text}'")
|
|
return None
|
|
|
|
new_start = start + start_offset
|
|
new_end = new_start + len(true_iban)
|
|
|
|
logger.debug(f"Adjusted IBAN span: {start}-{end} => {new_start}-{new_end}")
|
|
return (new_start, new_end) |