Actualiser conf/default.yaml

This commit is contained in:
2025-07-28 16:29:48 +00:00
parent f92a71bed1
commit add863150a

View File

@@ -1,9 +1,9 @@
# ===================================================================== # =====================================================================
# CONFIGURATION PRESIDIO - v24 # CONFIGURATION PRESIDIO - v25
# ===================================================================== # =====================================================================
supported_languages: [en, fr] supported_languages: [en, fr]
# 1. CONFIGURATION DU MOTEUR NLP # 1. CONFIGURATION DU MOTEUR NLP (INCHANGÉ)
# ===================================================================== # =====================================================================
nlp_configuration: nlp_configuration:
nlp_engine_name: spacy nlp_engine_name: spacy
@@ -30,12 +30,18 @@ nlp_configuration:
LOCATION: 0.4 LOCATION: 0.4
ORGANIZATION: 0.55 ORGANIZATION: 0.55
# 2. CONFIGURATION DU REGISTRE DES DÉTECTEURS # 2. CONFIGURATION DU REGISTRE DES DÉTECTEURS (AVEC L'AJOUT POUR LES WARNINGS)
# ===================================================================== # =====================================================================
recognizer_registry: recognizer_registry:
load_predefined_recognizers: true load_predefined_recognizers: true
recognizers: recognizers:
# --- On déclare le détecteur NLP de base ---
- name: SpacyRecognizer
supported_language: fr
- name: SpacyRecognizer
supported_language: en
- name: FlexibleDateRecognizer - name: FlexibleDateRecognizer
supported_language: fr supported_language: fr
@@ -49,7 +55,6 @@ recognizer_registry:
score: 1.0 score: 1.0
context: ["date", "né le", "signé le", "incident du"] context: ["date", "né le", "signé le", "incident du"]
- name: BelgianAddressRecognizer - name: BelgianAddressRecognizer
supported_language: fr supported_language: fr
supported_entity: BE_ADDRESS supported_entity: BE_ADDRESS
@@ -59,7 +64,6 @@ recognizer_registry:
score: 1.0 score: 1.0
context: ["demeurant", "adresse", "siège social", "bureaux situés"] context: ["demeurant", "adresse", "siège social", "bureaux situés"]
- name: BelgianPhoneRecognizer - name: BelgianPhoneRecognizer
supported_language: fr supported_language: fr
supported_entity: BE_PHONE_NUMBER supported_entity: BE_PHONE_NUMBER
@@ -69,7 +73,6 @@ recognizer_registry:
score: 0.95 score: 0.95
context: ["Tel", "Tél", "téléphone", "gsm", "mobile"] context: ["Tel", "Tél", "téléphone", "gsm", "mobile"]
- name: SmartOrganizationRecognizer - name: SmartOrganizationRecognizer
supported_language: fr supported_language: fr
supported_entity: ORGANIZATION supported_entity: ORGANIZATION
@@ -82,7 +85,6 @@ recognizer_registry:
score: 0.9 score: 0.9
context: ["société", "entreprise", "gérant de la"] context: ["société", "entreprise", "gérant de la"]
- name: ProfessionalIdRecognizer - name: ProfessionalIdRecognizer
supported_language: fr supported_language: fr
supported_entity: BE_PRO_ID supported_entity: BE_PRO_ID
@@ -92,7 +94,6 @@ recognizer_registry:
score: 1.0 score: 1.0
context: ["expert-comptable"] context: ["expert-comptable"]
- name: BelgianEnterpriseRecognizer - name: BelgianEnterpriseRecognizer
supported_language: fr supported_language: fr
supported_entity: BE_ENTERPRISE_NUMBER supported_entity: BE_ENTERPRISE_NUMBER
@@ -102,7 +103,6 @@ recognizer_registry:
score: 1.0 score: 1.0
context: ["BCE", "TVA", "intracommunautaire"] context: ["BCE", "TVA", "intracommunautaire"]
- name: EmailRecognizer - name: EmailRecognizer
supported_language: fr supported_language: fr
supported_entity: EMAIL_ADDRESS supported_entity: EMAIL_ADDRESS
@@ -127,9 +127,10 @@ recognizer_registry:
score: 0.95 score: 0.95
context: ["sécurité sociale", "insee", "nir"] context: ["sécurité sociale", "insee", "nir"]
# 3. LISTE D'EXCLUSION (ALLOW LIST) # 3. LISTE D'EXCLUSION
# ===================================================================== # =====================================================================
allow_list: allow_list:
- Adresse
- ADRESSE - ADRESSE
- Contrat - Contrat
- Document - Document
@@ -171,12 +172,14 @@ allow_list:
- Taux - Taux
- Valeur - Valeur
- Prix - Prix
- Coordonnées
- Témoins
# 4. CONFIGURATION DES TRANSFORMATIONS D'ANONYMISATION # 4. CONFIGURATION DES TRANSFORMATIONS D'ANONYMISATION
# ===================================================================== # =====================================================================
anonymizer_config: anonymizer_config:
default_anonymizers: default_anonymizers:
# Entités de base
PERSON: replace PERSON: replace
LOCATION: replace LOCATION: replace
ORGANIZATION: replace ORGANIZATION: replace
@@ -194,7 +197,6 @@ anonymizer_config:
IP_ADDRESS: replace IP_ADDRESS: replace
replacements: replacements:
# Remplacements de base
PERSON: "<PERSONNE>" PERSON: "<PERSONNE>"
LOCATION: "<LIEU>" LOCATION: "<LIEU>"
ORGANIZATION: "<ORGANISATION>" ORGANIZATION: "<ORGANISATION>"
@@ -206,7 +208,6 @@ anonymizer_config:
BE_NATIONAL_REGISTER_NUMBER: "<NRN_BELGE>" BE_NATIONAL_REGISTER_NUMBER: "<NRN_BELGE>"
FR_SOCIAL_SECURITY_NUMBER: "<NUM_SECU_FR>" FR_SOCIAL_SECURITY_NUMBER: "<NUM_SECU_FR>"
BE_PHONE_NUMBER: "<TELEPHONE_BE>" BE_PHONE_NUMBER: "<TELEPHONE_BE>"
# Ajout des remplacements pour les nouvelles entités SÛRES
FLEXIBLE_DATE: "<DATE>" FLEXIBLE_DATE: "<DATE>"
BE_ADDRESS: "<ADRESSE_BELGE>" BE_ADDRESS: "<ADRESSE_BELGE>"
BE_PRO_ID: "<ID_PROFESSIONNEL>" BE_PRO_ID: "<ID_PROFESSIONNEL>"