doublon done

This commit is contained in:
nBiqoz
2025-09-12 16:55:13 +02:00
parent c62e5b92d5
commit 3e70181b58
6 changed files with 32 additions and 9 deletions

View File

@@ -10,6 +10,7 @@ allow_list:
- TVA
- IEC
- expert-comptable
- prestataire
# Termes financiers
- Euro
- EUR

View File

@@ -5,7 +5,7 @@ anonymizer_config:
PERSON: replace
LOCATION: replace
ORGANIZATION: replace
DATE_TIME: replace
DATE: replace
MONEY: replace
EMAIL_ADDRESS: replace
IBAN: replace
@@ -45,11 +45,18 @@ anonymizer_config:
PERSON: "[PERSONNE]"
LOCATION: "[LIEU]"
ORGANIZATION: "[ORGANISATION]"
DATE_TIME: "[DATE]"
DATE: "[DATE]"
MONEY: "[MONTANT]"
EMAIL_ADDRESS: "[EMAIL]"
IBAN: "[IBAN]"
IP_ADDRESS: "[ADRESSE_IP]"
# PII Belges - AJOUTER CES LIGNES
BE_ENTERPRISE_NUMBER: "[ENTREPRISE_BELGE]"
BE_PHONE_NUMBER: "[TELEPHONE_BELGE]"
BE_ADDRESS: "[ADRESSE_BELGE]"
BE_ID_CARD: "[CARTE_ID_BELGE]"
BE_PASSPORT: "[PASSEPORT_BELGE]"
# PII Génériques - Données sensibles RGPD
HEALTH_DATA: "[DONNEES_SANTE]"

View File

@@ -15,9 +15,9 @@ nlp_configuration:
ORGANIZATION: ORGANIZATION
LOC: LOCATION
LOCATION: LOCATION
DATE: DATE_TIME
TIME: DATE_TIME
MISC: DATE_TIME
DATE: DATE
TIME: TIME
MISC: MISC
labels_to_ignore:
- LOCATION
- MISC
@@ -26,7 +26,6 @@ nlp_configuration:
- LANGUAGE
- LAW
- ORDINAL
- PERCENT
- PRODUCT
- QUANTITY
- WORK_OF_ART

View File

@@ -2,7 +2,7 @@ recognizer_registry:
recognizers:
- name: DateTimeRecognizer
supported_language: fr
supported_entity: DATE_TIME
supported_entity: DATE
patterns:
# Formats français standards avec différents séparateurs
- name: Date française DD/MM/YYYY
@@ -13,6 +13,23 @@ recognizer_registry:
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])-(?:0?[1-9]|1[0-2])-(?:19|20)\\d{2}\\b"
score: 0.95
# NOUVEAU: Formats courts DD-MM-YY
- name: Date française DD-MM-YY
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])-(?:0?[1-9]|1[0-2])-\\d{2}\\b"
score: 0.90
- name: Date française DD/MM/YY
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])/(?:0?[1-9]|1[0-2])/\\d{2}\\b"
score: 0.90
- name: Date belge DD.MM.YY
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])\\.(?:0?[1-9]|1[0-2])\\.\\d{2}\\b"
score: 0.90
- name: Date courte
regex: "\\b\\d{1,2}[-/.]\\d{1,2}[-/.]\\d{2}\\b"
score: 0.85
- name: Date française DD MM YYYY (espaces)
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])\\s+(?:0?[1-9]|1[0-2])\\s+(?:19|20)\\d{2}\\b"
score: 0.9