Actualiser conf/default.yaml
This commit is contained in:
@@ -36,8 +36,7 @@ recognizer_registry:
|
|||||||
load_predefined_recognizers: true
|
load_predefined_recognizers: true
|
||||||
|
|
||||||
recognizers:
|
recognizers:
|
||||||
# --- On déclare le détecteur NLP de base ---
|
# --- détecteur de dates flexibles ---
|
||||||
|
|
||||||
- name: FlexibleDateRecognizer
|
- name: FlexibleDateRecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: FLEXIBLE_DATE
|
supported_entity: FLEXIBLE_DATE
|
||||||
@@ -50,6 +49,7 @@ recognizer_registry:
|
|||||||
score: 1.0
|
score: 1.0
|
||||||
context: ["date", "né le", "signé le", "incident du"]
|
context: ["date", "né le", "signé le", "incident du"]
|
||||||
|
|
||||||
|
# --- adresse belge complète ---
|
||||||
- name: BelgianAddressRecognizer
|
- name: BelgianAddressRecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: BE_ADDRESS
|
supported_entity: BE_ADDRESS
|
||||||
@@ -59,6 +59,7 @@ recognizer_registry:
|
|||||||
score: 1.0
|
score: 1.0
|
||||||
context: ["demeurant", "adresse", "siège social", "bureaux situés"]
|
context: ["demeurant", "adresse", "siège social", "bureaux situés"]
|
||||||
|
|
||||||
|
# --- numéro téléphone belge ---
|
||||||
- name: BelgianPhoneRecognizer
|
- name: BelgianPhoneRecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: BE_PHONE_NUMBER
|
supported_entity: BE_PHONE_NUMBER
|
||||||
@@ -68,6 +69,7 @@ recognizer_registry:
|
|||||||
score: 0.95
|
score: 0.95
|
||||||
context: ["Tel", "Tél", "téléphone", "gsm", "mobile"]
|
context: ["Tel", "Tél", "téléphone", "gsm", "mobile"]
|
||||||
|
|
||||||
|
# --- organisation avec forme légale ---
|
||||||
- name: SmartOrganizationRecognizer
|
- name: SmartOrganizationRecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: ORGANIZATION
|
supported_entity: ORGANIZATION
|
||||||
@@ -80,6 +82,7 @@ recognizer_registry:
|
|||||||
score: 0.9
|
score: 0.9
|
||||||
context: ["société", "entreprise", "gérant de la"]
|
context: ["société", "entreprise", "gérant de la"]
|
||||||
|
|
||||||
|
# --- numéro professionnel IEC ---
|
||||||
- name: ProfessionalIdRecognizer
|
- name: ProfessionalIdRecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: BE_PRO_ID
|
supported_entity: BE_PRO_ID
|
||||||
@@ -89,6 +92,7 @@ recognizer_registry:
|
|||||||
score: 1.0
|
score: 1.0
|
||||||
context: ["expert-comptable"]
|
context: ["expert-comptable"]
|
||||||
|
|
||||||
|
# --- numéro BCE/TVA belge ---
|
||||||
- name: BelgianEnterpriseRecognizer
|
- name: BelgianEnterpriseRecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: BE_ENTERPRISE_NUMBER
|
supported_entity: BE_ENTERPRISE_NUMBER
|
||||||
@@ -98,21 +102,37 @@ recognizer_registry:
|
|||||||
score: 1.0
|
score: 1.0
|
||||||
context: ["BCE", "TVA", "intracommunautaire"]
|
context: ["BCE", "TVA", "intracommunautaire"]
|
||||||
|
|
||||||
|
# --- Email ---
|
||||||
- name: EmailRecognizer
|
- name: EmailRecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: EMAIL_ADDRESS
|
supported_entity: EMAIL_ADDRESS
|
||||||
patterns: [{name: Email Pattern, regex: "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b", score: 1.0}]
|
patterns:
|
||||||
|
- name: Email Pattern
|
||||||
|
regex: "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b"
|
||||||
|
score: 1.0
|
||||||
context: ["email", "courriel", "mail"]
|
context: ["email", "courriel", "mail"]
|
||||||
|
|
||||||
|
# --- IBAN ---
|
||||||
- name: IbanRecognizer
|
- name: IbanRecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: IBAN
|
supported_entity: IBAN
|
||||||
patterns: [{name: IBAN Pattern, regex: "\\b[A-Z]{2}[0-9]{2}\\s?(?:[A-Z0-9]{4}\\s?){2,7}[A-Z0-9]{1,4}\\b", score: 0.95}]
|
patterns:
|
||||||
|
- name: IBAN Pattern
|
||||||
|
regex: "\\b[A-Z]{2}[0-9]{2}\\s?(?:[A-Z0-9]{4}\\s?){2,7}[A-Z0-9]{1,4}\\b"
|
||||||
|
score: 0.95
|
||||||
context: ["iban", "compte"]
|
context: ["iban", "compte"]
|
||||||
|
|
||||||
|
# --- Numéro registre national belge ---
|
||||||
- name: BelgianNRNRecognizer
|
- name: BelgianNRNRecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: BE_NATIONAL_REGISTER_NUMBER
|
supported_entity: BE_NATIONAL_REGISTER_NUMBER
|
||||||
patterns: [{name: NRN Pattern, regex: "\\b[0-9]{2}\\.[0-9]{2}\\.[0-9]{2}-[0-9]{3}\\.[0-9]{2}\\b", score: 1.0}]
|
patterns:
|
||||||
|
- name: NRN Pattern
|
||||||
|
regex: "\\b[0-9]{2}\\.[0-9]{2}\\.[0-9]{2}-[0-9]{3}\\.[0-9]{2}\\b"
|
||||||
|
score: 1.0
|
||||||
context: ["registre national"]
|
context: ["registre national"]
|
||||||
|
|
||||||
|
# --- Numéro sécurité sociale France (INSEE) ---
|
||||||
- name: FrenchINSEERecognizer
|
- name: FrenchINSEERecognizer
|
||||||
supported_language: fr
|
supported_language: fr
|
||||||
supported_entity: FR_SOCIAL_SECURITY_NUMBER
|
supported_entity: FR_SOCIAL_SECURITY_NUMBER
|
||||||
@@ -122,6 +142,19 @@ recognizer_registry:
|
|||||||
score: 0.95
|
score: 0.95
|
||||||
context: ["sécurité sociale", "insee", "nir"]
|
context: ["sécurité sociale", "insee", "nir"]
|
||||||
|
|
||||||
|
# --- Adresse IP (IPv4 et IPv6) ---
|
||||||
|
- name: IpAddressRecognizer
|
||||||
|
supported_language: fr
|
||||||
|
supported_entity: IP_ADDRESS
|
||||||
|
patterns:
|
||||||
|
- name: IPv4
|
||||||
|
regex: "\\b(?:(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\.){3}(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\b"
|
||||||
|
score: 1.0
|
||||||
|
- name: IPv6
|
||||||
|
regex: "\\b([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b"
|
||||||
|
score: 0.9
|
||||||
|
context: ["adresse ip", "ip", "serveur", "exposé"]
|
||||||
|
|
||||||
# 3. LISTE D'EXCLUSION
|
# 3. LISTE D'EXCLUSION
|
||||||
# =====================================================================
|
# =====================================================================
|
||||||
allow_list:
|
allow_list:
|
||||||
@@ -169,9 +202,12 @@ allow_list:
|
|||||||
- Prix
|
- Prix
|
||||||
- Coordonnées
|
- Coordonnées
|
||||||
- Témoins
|
- Témoins
|
||||||
- "Coordonnées bancaires"
|
- Coordonnées bancaires
|
||||||
- "Témoins clés"
|
- Témoins clés
|
||||||
|
- montrent
|
||||||
|
- montrent des
|
||||||
|
- montrent des irrégularités
|
||||||
|
- bénéficiaire
|
||||||
|
|
||||||
# 4. CONFIGURATION DES TRANSFORMATIONS D'ANONYMISATION
|
# 4. CONFIGURATION DES TRANSFORMATIONS D'ANONYMISATION
|
||||||
# =====================================================================
|
# =====================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user