Actualiser conf/default.yaml
This commit is contained in:
@@ -30,13 +30,12 @@ nlp_configuration:
|
||||
PERSON: 0.85
|
||||
ORGANIZATION: 0.55
|
||||
|
||||
# 2. CONFIGURATION DU REGISTRE DES DÉTECTEURS (AVEC L'AJOUT POUR LES WARNINGS)
|
||||
# 2. CONFIGURATION DU REGISTRE DES DÉTECTEURS
|
||||
# =====================================================================
|
||||
recognizer_registry:
|
||||
load_predefined_recognizers: true
|
||||
|
||||
recognizers:
|
||||
# --- détecteur de dates flexibles ---
|
||||
- name: FlexibleDateRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FLEXIBLE_DATE
|
||||
@@ -49,7 +48,6 @@ recognizer_registry:
|
||||
score: 1.0
|
||||
context: ["date", "né le", "signé le", "incident du"]
|
||||
|
||||
# --- adresse belge complète ---
|
||||
- name: BelgianAddressRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: BE_ADDRESS
|
||||
@@ -59,7 +57,6 @@ recognizer_registry:
|
||||
score: 1.0
|
||||
context: ["demeurant", "adresse", "siège social", "bureaux situés"]
|
||||
|
||||
# --- numéro téléphone belge ---
|
||||
- name: BelgianPhoneRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: BE_PHONE_NUMBER
|
||||
@@ -69,7 +66,6 @@ recognizer_registry:
|
||||
score: 0.95
|
||||
context: ["Tel", "Tél", "téléphone", "gsm", "mobile"]
|
||||
|
||||
# --- organisation avec forme légale ---
|
||||
- name: SmartOrganizationRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: ORGANIZATION
|
||||
@@ -82,7 +78,6 @@ recognizer_registry:
|
||||
score: 0.9
|
||||
context: ["société", "entreprise", "gérant de la"]
|
||||
|
||||
# --- numéro professionnel IEC ---
|
||||
- name: ProfessionalIdRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: BE_PRO_ID
|
||||
@@ -92,7 +87,6 @@ recognizer_registry:
|
||||
score: 1.0
|
||||
context: ["expert-comptable"]
|
||||
|
||||
# --- numéro BCE/TVA belge ---
|
||||
- name: BelgianEnterpriseRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: BE_ENTERPRISE_NUMBER
|
||||
@@ -102,7 +96,6 @@ recognizer_registry:
|
||||
score: 1.0
|
||||
context: ["BCE", "TVA", "intracommunautaire"]
|
||||
|
||||
# --- Email ---
|
||||
- name: EmailRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: EMAIL_ADDRESS
|
||||
@@ -112,17 +105,16 @@ recognizer_registry:
|
||||
score: 1.0
|
||||
context: ["email", "courriel", "mail"]
|
||||
|
||||
# --- IBAN ---
|
||||
- name: IbanRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: IBAN
|
||||
patterns:
|
||||
- name: IBAN Pattern
|
||||
regex: "\\b[A-Z]{2}[0-9]{2}\\s?(?:[A-Z0-9]{4}\\s?){2,7}[A-Z0-9]{1,4}\\b"
|
||||
# Remplacement de \s? par \s obligatoire pour éviter recouvrement trop grand
|
||||
regex: "\\b[A-Z]{2}[0-9]{2}(?:\\s[A-Z0-9]{4}){4,7}\\b"
|
||||
score: 0.95
|
||||
context: ["iban", "compte"]
|
||||
|
||||
# --- Numéro registre national belge ---
|
||||
- name: BelgianNRNRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: BE_NATIONAL_REGISTER_NUMBER
|
||||
@@ -132,7 +124,6 @@ recognizer_registry:
|
||||
score: 1.0
|
||||
context: ["registre national"]
|
||||
|
||||
# --- Numéro sécurité sociale France (INSEE) ---
|
||||
- name: FrenchINSEERecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FR_SOCIAL_SECURITY_NUMBER
|
||||
@@ -142,7 +133,6 @@ recognizer_registry:
|
||||
score: 0.95
|
||||
context: ["sécurité sociale", "insee", "nir"]
|
||||
|
||||
# --- Adresse IP (IPv4 et IPv6) ---
|
||||
- name: IpAddressRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: IP_ADDRESS
|
||||
@@ -153,7 +143,8 @@ recognizer_registry:
|
||||
- name: IPv6
|
||||
regex: "\\b([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b"
|
||||
score: 0.9
|
||||
context: ["adresse ip", "ip", "serveur", "exposé"]
|
||||
# On retire le contexte pour plus de souplesse
|
||||
# context: ["adresse ip", "ip", "serveur", "exposé"]
|
||||
|
||||
# 3. LISTE D'EXCLUSION
|
||||
# =====================================================================
|
||||
@@ -202,12 +193,12 @@ allow_list:
|
||||
- Prix
|
||||
- Coordonnées
|
||||
- Témoins
|
||||
- Coordonnées bancaires
|
||||
- Témoins clés
|
||||
- montrent
|
||||
- montrent des
|
||||
- montrent des irrégularités
|
||||
- bénéficiaire
|
||||
- "Coordonnées bancaires"
|
||||
- "Témoins clés"
|
||||
- "montrent"
|
||||
- "montrent des"
|
||||
- "montrent des irrégularités"
|
||||
- "bénéficiaire"
|
||||
|
||||
# 4. CONFIGURATION DES TRANSFORMATIONS D'ANONYMISATION
|
||||
# =====================================================================
|
||||
|
||||
Reference in New Issue
Block a user