From f28e68441d287c5eb413f9299689b849ebe43521 Mon Sep 17 00:00:00 2001 From: Nacim Date: Tue, 24 Jun 2025 15:22:40 +0200 Subject: [PATCH] Update default.yaml --- conf/default.yaml | 99 ++++++++++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 35 deletions(-) diff --git a/conf/default.yaml b/conf/default.yaml index 32afc9b..034aef8 100644 --- a/conf/default.yaml +++ b/conf/default.yaml @@ -1,5 +1,5 @@ # ===================================================================== -# CONFIGURATION PRESIDIO - v31 (FINALE, ÉPURÉE ET FIABLE) +# CONFIGURATION PRESIDIO - v24 (COMPLÈTE, AVEC VOTRE REGEX TÉLÉPHONE) # ===================================================================== supported_languages: [en, fr] @@ -36,7 +36,7 @@ recognizer_registry: load_predefined_recognizers: true recognizers: - # --- DÉTECTEURS FIABLES ET PRÉCIS UNIQUEMENT --- + # --- DÉTECTEURS FIABLES ET PRÉCIS --- - name: CustomDateRecognizer supported_language: fr supported_entity: CUSTOM_DATE @@ -44,23 +44,59 @@ recognizer_registry: - name: Date JJ/MM/AAAA regex: "\\b(0[1-9]|[12][0-9]|3[01])[-/.](0[1-9]|1[012])[-/.](19|20)\\d{2}\\b" score: 1.0 - context: ["date de naissance", "né le", "date"] + context: ["date de naissance", "né le"] - - name: BelgianPhoneRecognizer + - name: EmailRecognizer supported_language: fr - supported_entity: PHONE_NUMBER + supported_entity: EMAIL_ADDRESS + patterns: [{name: Email Pattern, regex: "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b", score: 1.0}] + context: ["email", "courriel", "mail"] + + # --- VOTRE REGEX TÉLÉPHONE, ADAPTÉE POUR PRESIDIO --- + - name: CustomBelgianPhoneRecognizer + supported_language: fr + supported_entity: BE_PHONE_NUMBER patterns: - - name: Belgian Mobile Pattern - regex: "\\b(?:\\+|00)32[\\s.-]?4[6-9][0-9](?:[\\s.-]?\\d{2}){3}\\b" + - name: Belgian Phone Pattern (votre regex adaptée) + # J'ai doublé les backslashes et remplacé ^/$ par \b + regex: "\\b(((\\+|00)32[ ]?(?:\\(0\\)[ ]?)?)|0)?4\\d{2}(\\s?\\d{2}[\\s\\.]?){3}\\b" score: 1.0 context: ["téléphone", "tel", "gsm", "mobile"] + - name: CustomCreditCardRecognizer + supported_language: fr + supported_entity: CREDIT_CARD_NUMBER + patterns: + - name: Credit Card with spaces (Visa, Mastercard) + regex: "\\b(?:4[0-9]{3}(?:[ -]?[0-9]{4}){3}|5[1-5][0-9]{2}(?:[ -]?[0-9]{4}){3})\\b" + score: 0.95 + context: ["carte", "visa", "mastercard"] + - name: IbanRecognizer supported_language: fr supported_entity: IBAN patterns: [{name: IBAN Pattern, regex: "\\b[A-Z]{2}[0-9]{2}\\s?(?:[A-Z0-9]{4}\\s?){2,7}[A-Z0-9]{1,4}\\b", score: 0.95}] context: ["iban", "compte"] + + - name: SWIFTRecognizer + supported_language: fr + supported_entity: SWIFT_CODE + patterns: + - name: BIC/SWIFT Code + regex: "\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b" + score: 0.5 + context: ["bic", "swift"] + - name: SmartOrganizationRecognizer + supported_language: fr + supported_entity: ORGANIZATION + patterns: + - name: Company Name with Legal Form + regex: "\\b([A-Z][a-zà-ÿ]+(?:\\s[A-Z][a-zà-ÿ]+)*)\\s+(SPRL|SARL|SA|SCS|SNC)\\b" + score: 1.0 + context: ["société", "entreprise"] + + # --- NUMÉROS D'IDENTIFICATION --- - name: BelgianEnterpriseRecognizer supported_language: fr supported_entity: BE_ENTERPRISE_NUMBER @@ -73,40 +109,29 @@ recognizer_registry: patterns: [{name: NRN Pattern, regex: "\\b[0-9]{2}\\.[0-9]{2}\\.[0-9]{2}-[0-9]{3}\\.[0-9]{2}\\b", score: 1.0}] context: ["registre national"] + - name: FrenchINSEERecognizer + supported_language: fr + supported_entity: FR_SOCIAL_SECURITY_NUMBER + patterns: + - name: INSEE Pattern with flexible spaces + regex: "\\b[12]\\s*[0-9]{2}\\s*(?:0[1-9]|1[0-2])\\s*(?:2[ABab]|[0-9]{2})\\s*[0-9]{3}\\s*[0-9]{3}[\\s]?[0-9]{2}\\b" + score: 0.95 + context: ["sécurité sociale", "insee", "nir"] + # 3. LISTE D'EXCLUSION (ALLOW LIST) # ===================================================================== allow_list: - # Liste de sécurité pour les mots en majuscules des contrats - - CONTRAT - - PRÊT - - CONVERTIBLE - - PROJET - - CONSTITUE - - CONTIENT - - CONSEILS - - PARTIES - - UTILISANT - - DOCUMENT - - DEVRAIENT - - TOUJOURS - - ENVISAGER - - SOIGNEUSEMENT - - DEMANDER - - CONSEILLERS - - QUALIFIÉS - - ÉVALUER - - IMPLICATIONS - - UTILISATION - - ATTENDU - - QUE - # Liste standard - Adresse - ADRESSE + - Contrat + - Document - Société - Investisseur - Montant + - Prêt - Intérêt - Partie + - Parties - Annexe - Remboursement - Conversion @@ -149,15 +174,17 @@ anonymizer_config: ORGANIZATION: replace DATE_TIME: replace MONEY: replace - CREDIT_CARD_NUMBER: replace - EMAIL_ADDRESS: replace # Entités détectées par nos règles personnalisées CUSTOM_DATE: replace - PHONE_NUMBER: replace + CREDIT_CARD_NUMBER: replace + EMAIL_ADDRESS: replace IBAN: replace BE_ENTERPRISE_NUMBER: replace BE_NATIONAL_REGISTER_NUMBER: replace + FR_SOCIAL_SECURITY_NUMBER: replace + SWIFT_CODE: replace + BE_PHONE_NUMBER: replace replacements: PERSON: "" @@ -168,7 +195,9 @@ anonymizer_config: CREDIT_CARD_NUMBER: "" MONEY: "" EMAIL_ADDRESS: "" - PHONE_NUMBER: "" IBAN: "" BE_ENTERPRISE_NUMBER: "" BE_NATIONAL_REGISTER_NUMBER: "" + FR_SOCIAL_SECURITY_NUMBER: "" + SWIFT_CODE: "" + BE_PHONE_NUMBER: ""