presidio modulaire
This commit is contained in:
20
conf/recognizers/PII/generic/bank_accounts.yaml
Normal file
20
conf/recognizers/PII/generic/bank_accounts.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# Recognizer pour IBAN
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: IbanRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: IBAN
|
||||
patterns:
|
||||
- name: IBAN avec espaces
|
||||
regex: "\\b[A-Z]{2}[0-9]{2}(?:\\s[0-9A-Z]{4}){3,7}\\b"
|
||||
score: 1.0
|
||||
- name: IBAN compact
|
||||
regex: "\\b[A-Z]{2}[0-9]{2}[0-9A-Z]{12,30}\\b"
|
||||
score: 0.9
|
||||
- name: IBAN belge spécifique
|
||||
regex: "\\bBE[0-9]{2}\\s?[0-9]{4}\\s?[0-9]{4}\\s?[0-9]{4}\\b"
|
||||
score: 0.95
|
||||
- name: IBAN français spécifique
|
||||
regex: "\\bFR[0-9]{2}\\s?[0-9]{4}\\s?[0-9]{4}\\s?[0-9]{4}\\s?[0-9]{4}\\s?[0-9]{3}\\b"
|
||||
score: 0.95
|
||||
context: ["iban", "compte", "bancaire", "virement", "RIB"]
|
||||
14
conf/recognizers/PII/generic/biometric_data.yaml
Normal file
14
conf/recognizers/PII/generic/biometric_data.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
# Recognizer pour données biométriques
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: BiometricDataRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: BIOMETRIC_DATA
|
||||
patterns:
|
||||
- name: Données biométriques
|
||||
regex: "\\b(?:empreinte(?:s)?\\s+digitale(?:s)?|reconnaissance\\s+faciale|scan\\s+(?:iris|rétine)|biométrie|ADN|profil\\s+génétique)\\b"
|
||||
score: 0.95
|
||||
- name: Identifiants biométriques
|
||||
regex: "\\b(?:template|hash)\\s+biométrique\\s*:?\\s*[A-F0-9]{32,}\\b"
|
||||
score: 0.9
|
||||
context: ["biométrie", "empreinte", "reconnaissance", "scan", "identification"]
|
||||
74
conf/recognizers/PII/generic/dates.yaml
Normal file
74
conf/recognizers/PII/generic/dates.yaml
Normal file
@@ -0,0 +1,74 @@
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: DateTimeRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: DATE_TIME
|
||||
patterns:
|
||||
# Formats français standards avec différents séparateurs
|
||||
- name: Date française DD/MM/YYYY
|
||||
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])/(?:0?[1-9]|1[0-2])/(?:19|20)\\d{2}\\b"
|
||||
score: 0.95
|
||||
|
||||
- name: Date française DD-MM-YYYY
|
||||
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])-(?:0?[1-9]|1[0-2])-(?:19|20)\\d{2}\\b"
|
||||
score: 0.95
|
||||
|
||||
- name: Date française DD MM YYYY (espaces)
|
||||
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])\\s+(?:0?[1-9]|1[0-2])\\s+(?:19|20)\\d{2}\\b"
|
||||
score: 0.9
|
||||
|
||||
- name: Date ISO YYYY-MM-DD
|
||||
regex: "\\b(?:19|20)\\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12][0-9]|3[01])\\b"
|
||||
score: 0.98
|
||||
|
||||
# Dates avec mois en lettres (joli format)
|
||||
- name: Date avec mois en lettres
|
||||
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])\\s+(?:janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre)\\s+(?:19|20)\\d{2}\\b"
|
||||
score: 0.99
|
||||
|
||||
# Format belge DD.MM.YYYY
|
||||
- name: Date belge DD.MM.YYYY
|
||||
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])\\.(?:0?[1-9]|1[0-2])\\.(?:19|20)\\d{2}\\b"
|
||||
score: 0.95
|
||||
|
||||
# Heures (HH:MM et HH:MM:SS)
|
||||
- name: Heure HH:MM(:SS)?
|
||||
regex: "\\b(?:[01]?[0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9])?\\b"
|
||||
score: 0.85
|
||||
|
||||
# Date et heure combinées (ex: 12/05/2023 14:30)
|
||||
- name: Date et heure combinées
|
||||
regex: "\\b(?:0?[1-9]|[12][0-9]|3[01])[/-](?:0?[1-9]|1[0-2])[/-](?:19|20)\\d{2}\\s+(?:[01]?[0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9])?\\b"
|
||||
score: 0.97
|
||||
|
||||
# Années seules dans un contexte fort
|
||||
- name: Année avec contexte
|
||||
regex: "\\b(?:en|depuis|année|an|né en|décédé en)\\s+(?:19|20)\\d{2}\\b"
|
||||
score: 0.8
|
||||
|
||||
context:
|
||||
[
|
||||
"date",
|
||||
"né le",
|
||||
"née le",
|
||||
"naissance",
|
||||
"décès",
|
||||
"décédé le",
|
||||
"le",
|
||||
"du",
|
||||
"au",
|
||||
"depuis",
|
||||
"jusqu'au",
|
||||
"entre",
|
||||
"avant",
|
||||
"après",
|
||||
"heure",
|
||||
"horaire",
|
||||
"rendez-vous",
|
||||
"réunion",
|
||||
"événement",
|
||||
"signature",
|
||||
"signé le",
|
||||
"établi le",
|
||||
"fait le",
|
||||
]
|
||||
18
conf/recognizers/PII/generic/emails.yaml
Normal file
18
conf/recognizers/PII/generic/emails.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
# Recognizer pour adresses email
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: EmailRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: EMAIL_ADDRESS
|
||||
patterns:
|
||||
- name: Email standard
|
||||
regex: "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b"
|
||||
score: 1.0
|
||||
- name: Email obfusqué
|
||||
regex: "\\b[A-Za-z0-9._%+-]+\\s*\\[at\\]\\s*[A-Za-z0-9.-]+\\s*\\[dot\\]\\s*[A-Z|a-z]{2,}\\b"
|
||||
score: 0.8
|
||||
- name: Email avec (at) et (point)
|
||||
regex: "\\b[A-Za-z0-9._%+-]+\\s*\\(at\\)\\s*[A-Za-z0-9.-]+\\s*\\(point\\)\\s*[A-Z|a-z]{2,}\\b"
|
||||
score: 0.7
|
||||
context:
|
||||
["email", "courriel", "mail", "@", "contact", "adresse électronique"]
|
||||
17
conf/recognizers/PII/generic/financial_data.yaml
Normal file
17
conf/recognizers/PII/generic/financial_data.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
# Recognizer pour données financières RGPD
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: RGPDFinancialDataRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: RGPD_FINANCIAL_DATA
|
||||
patterns:
|
||||
- name: Numéro de carte bancaire
|
||||
regex: "\\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})\\b"
|
||||
score: 1.0
|
||||
- name: Code de sécurité
|
||||
regex: "\\b(?:CVV|CVC|cryptogramme)\\s*:?\\s*[0-9]{3,4}\\b"
|
||||
score: 0.95
|
||||
- name: Revenus
|
||||
regex: "\\b(?:salaire|revenu|rémunération)\\s*:?\\s*[0-9]{1,3}(?:[\\s.,][0-9]{3})*\\s*€?\\b"
|
||||
score: 0.8
|
||||
context: ["financier", "bancaire", "carte", "paiement", "salaire"]
|
||||
17
conf/recognizers/PII/generic/health_data.yaml
Normal file
17
conf/recognizers/PII/generic/health_data.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
# Recognizer pour données de santé
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: HealthDataRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: HEALTH_DATA
|
||||
patterns:
|
||||
- name: Informations médicales
|
||||
regex: "\\b(?:maladie|pathologie|diagnostic|traitement|médicament|hospitalisation|chirurgie|opération|allergie|antécédent|symptôme)\\b"
|
||||
score: 0.85
|
||||
- name: Données médicales sensibles
|
||||
regex: "\\b(?:VIH|SIDA|cancer|diabète|dépression|schizophrénie|bipolarité|addiction)\\b"
|
||||
score: 0.95
|
||||
- name: Professionnels de santé
|
||||
regex: "\\b(?:Dr|Docteur|Médecin|Infirmier|Psychiatre|Psychologue)\\s+[A-Z][a-z]+\\b"
|
||||
score: 0.9
|
||||
context: ["santé", "médical", "hôpital", "clinique", "patient", "dossier médical"]
|
||||
17
conf/recognizers/PII/generic/ip_addresses.yaml
Normal file
17
conf/recognizers/PII/generic/ip_addresses.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
# Recognizer pour adresses IP
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: IpAddressRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: IP_ADDRESS
|
||||
patterns:
|
||||
- name: IPv4
|
||||
regex: "\\b(?:(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\.){3}(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\b"
|
||||
score: 1.0
|
||||
- name: IPv6
|
||||
regex: "\\b([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b"
|
||||
score: 0.9
|
||||
- name: IPv6 compressé
|
||||
regex: "\\b([0-9a-fA-F]{1,4}:){1,7}:([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}\\b"
|
||||
score: 0.85
|
||||
context: ["IP", "adresse", "serveur", "réseau", "connexion"]
|
||||
11
conf/recognizers/PII/generic/political_opinions.yaml
Normal file
11
conf/recognizers/PII/generic/political_opinions.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
# Recognizer pour opinions politiques
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: PoliticalOpinionsRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: POLITICAL_OPINIONS
|
||||
patterns:
|
||||
- name: Opinions politiques
|
||||
regex: "\\b(?:vote|électeur|parti\\s+politique|opinion\\s+politique)\\b"
|
||||
score: 0.7
|
||||
context: ["données sensibles", "RGPD", "politique", "privé"]
|
||||
11
conf/recognizers/PII/generic/sexual_orientation.yaml
Normal file
11
conf/recognizers/PII/generic/sexual_orientation.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
# Recognizer pour orientation sexuelle
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: SexualOrientationRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: SEXUAL_ORIENTATION
|
||||
patterns:
|
||||
- name: Orientation sexuelle
|
||||
regex: "\\b(?:homosexuel|hétérosexuel|bisexuel|orientation\\s+sexuelle)\\b"
|
||||
score: 0.9
|
||||
context: ["données sensibles", "RGPD", "orientation", "privé"]
|
||||
Reference in New Issue
Block a user