presidio modulaire

This commit is contained in:
nBiqoz
2025-09-07 12:29:08 +02:00
parent 85d95d05e5
commit c62e5b92d5
42 changed files with 1802 additions and 324 deletions

49
refiners/iban_refiner.py Normal file
View File

@@ -0,0 +1,49 @@
from abc import ABC, abstractmethod
from typing import Optional, Tuple
import re
import logging
logger = logging.getLogger(__name__)
class EntityRefiner(ABC):
"""Classe de base pour le recadrage d'entités"""
def __init__(self, entity_type: str):
self.entity_type = entity_type
@abstractmethod
def refine(self, text: str, start: int, end: int) -> Optional[Tuple[int, int]]:
"""Recadre une entité détectée"""
pass
def should_process(self, entity_type: str) -> bool:
"""Vérifie si ce raffineur doit traiter ce type d'entité"""
return entity_type == self.entity_type
class IBANRefiner(EntityRefiner):
"""Raffineur pour les IBAN"""
def __init__(self):
super().__init__("IBAN")
self.iban_regex = re.compile(r"\b[A-Z]{2}[0-9]{2}(?:\s[0-9]{4}){3}\b", re.IGNORECASE)
def refine(self, text: str, start: int, end: int) -> Optional[Tuple[int, int]]:
ent_text = text[start:end].strip()
match = self.iban_regex.search(ent_text)
if not match:
logger.warning(f"Invalid IBAN detected, skipping: '{ent_text}'")
return None
true_iban = match.group(0)
start_offset = ent_text.find(true_iban)
if start_offset == -1:
logger.warning(f"IBAN regex match but cannot find substring position: '{ent_text}'")
return None
new_start = start + start_offset
new_end = new_start + len(true_iban)
logger.debug(f"Adjusted IBAN span: {start}-{end} => {new_start}-{new_end}")
return (new_start, new_end)