From 601e94d174fccdd7d0547b977215b98638deaeeb Mon Sep 17 00:00:00 2001 From: Nacim Date: Mon, 16 Jun 2025 03:44:09 +0200 Subject: [PATCH] Update custom_recognizers.py --- presidio_config/custom_recognizers.py | 40 +++++++++++---------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/presidio_config/custom_recognizers.py b/presidio_config/custom_recognizers.py index d3fd999..ca47066 100644 --- a/presidio_config/custom_recognizers.py +++ b/presidio_config/custom_recognizers.py @@ -10,19 +10,16 @@ class BelgianNrnRecognizer(PatternRecognizer): def __init__(self): patterns = [ - # Format standard : 12.34.56-789.01 Pattern( name="NRN format standard", regex=r"\b\d{2}\.\d{2}\.\d{2}-\d{3}\.\d{2}\b", score=1.0 ), - # Format compact : 12345678901 Pattern( name="NRN format compact", regex=r"\b\d{11}\b", score=0.7 ), - # Format avec espaces : 12 34 56 789 01 Pattern( name="NRN format espacé", regex=r"\b\d{2}\s\d{2}\s\d{2}\s\d{3}\s\d{2}\b", @@ -42,13 +39,11 @@ class BelgianEnterpriseRecognizer(PatternRecognizer): def __init__(self): patterns = [ - # Format standard : BE0123.456.789 Pattern( name="BTW/TVA format standard", regex=r"\bBE\s?0\d{3}\.\d{3}\.\d{3}\b", score=0.95 ), - # Format sans points : BE0123456789 Pattern( name="BTW/TVA format compact", regex=r"\bBE\s?0\d{9}\b", @@ -68,7 +63,6 @@ class BelgianBankAccountRecognizer(PatternRecognizer): def __init__(self): patterns = [ - # Format belge : 123-4567890-12 Pattern( name="Compte bancaire belge", regex=r"\b\d{3}-\d{7}-\d{2}\b", @@ -88,13 +82,11 @@ class ImprovedIbanRecognizer(PatternRecognizer): def __init__(self): patterns = [ - # IBAN avec espaces Pattern( name="IBAN avec espaces", regex=r"\b[A-Z]{2}\d{2}(?:\s\d{4}){3,7}(?:\s\d{1,4})?\b", score=0.95 ), - # IBAN sans espaces Pattern( name="IBAN compact", regex=r"\b[A-Z]{2}\d{2}[A-Z0-9]{4,32}\b", @@ -114,19 +106,16 @@ class ImprovedPhoneRecognizer(PatternRecognizer): def __init__(self): patterns = [ - # Format international : +32 1 23 45 67 89 Pattern( name="Téléphone international", regex=r"\b(?:\+|00)(?:32|33|352)\s?[1-9](?:[\s.-]?\d{2}){3,4}\b", score=0.9 ), - # Format national : 01 23 45 67 89 Pattern( name="Téléphone national", regex=r"\b0[1-9](?:[\s.-]?\d{2}){4}\b", score=0.8 ), - # Format mobile belge : 04xx xx xx xx Pattern( name="Mobile belge", regex=r"\b04\d{2}[\s.-]?\d{2}[\s.-]?\d{2}[\s.-]?\d{2}\b", @@ -146,13 +135,11 @@ class FrenchNIRRecognizer(PatternRecognizer): def __init__(self): patterns = [ - # Format avec espaces : 1 23 04 75 123 456 78 Pattern( name="NIR avec espaces", regex=r"\b[12]\s?\d{2}\s?(?:0[1-9]|1[0-2])\s?(?:2[ABab]|[0-9]{2})\s?\d{3}\s?\d{3}\s?\d{2}\b", score=1.0 ), - # Format compact : 12304751234567 Pattern( name="NIR compact", regex=r"\b[12]\d{2}(?:0[1-9]|1[0-2])(?:2[ABab]|[0-9]{2})\d{6}\d{2}\b", @@ -172,7 +159,6 @@ class ImprovedEmailRecognizer(PatternRecognizer): def __init__(self): patterns = [ - # Email standard avec domaines courants Pattern( name="Email standard", regex=r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b", @@ -187,13 +173,19 @@ class ImprovedEmailRecognizer(PatternRecognizer): ) -# Liste des reconnaisseurs à charger -custom_recognizers = [ - BelgianNrnRecognizer(), - BelgianEnterpriseRecognizer(), - BelgianBankAccountRecognizer(), - ImprovedIbanRecognizer(), - ImprovedPhoneRecognizer(), - FrenchNIRRecognizer(), - ImprovedEmailRecognizer() -] +# Fonction pour créer les instances des reconnaisseurs +def get_custom_recognizers(): + """Retourne la liste des reconnaisseurs personnalisés instanciés.""" + return [ + BelgianNrnRecognizer(), + BelgianEnterpriseRecognizer(), + BelgianBankAccountRecognizer(), + ImprovedIbanRecognizer(), + ImprovedPhoneRecognizer(), + FrenchNIRRecognizer(), + ImprovedEmailRecognizer() + ] + + +# Variable pour la compatibilité avec la configuration YAML +custom_recognizers = get_custom_recognizers()