presidio modulaire
This commit is contained in:
14
conf/recognizers/PII/french/addresses.yaml
Normal file
14
conf/recognizers/PII/french/addresses.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
# Recognizer pour adresses françaises
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: FrenchAddressRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FR_ADDRESS
|
||||
patterns:
|
||||
- name: Adresse française complète
|
||||
regex: "\\b\\d{1,4}(?:bis|ter|quater)?\\s+(?:rue|avenue|boulevard|place|impasse|allée|chemin|route)\\s+[A-Za-zà-ÿ\\s'-]+,\\s*[0-9]{5}\\s+[A-Za-zà-ÿ\\s'-]+\\b"
|
||||
score: 0.95
|
||||
- name: Code postal français
|
||||
regex: "\\b[0-9]{5}\\b"
|
||||
score: 0.6
|
||||
context: ["adresse", "domicile", "résidence", "siège social"]
|
||||
39
conf/recognizers/PII/french/documents.yaml
Normal file
39
conf/recognizers/PII/french/documents.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
# Recognizer pour documents d'identité français
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: FrenchIDCardRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FR_ID_CARD
|
||||
patterns:
|
||||
- name: Numéro CNI nouveau format
|
||||
regex: "\\b[0-9]{12}\\b"
|
||||
score: 0.85
|
||||
- name: Numéro CNI avec espaces
|
||||
regex: "\\b[0-9]{4}\\s[0-9]{4}\\s[0-9]{4}\\b"
|
||||
score: 0.9
|
||||
context:
|
||||
["carte d'identité", "CNI", "pièce d'identité", "numéro d'identité"]
|
||||
|
||||
- name: FrenchPassportRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FR_PASSPORT
|
||||
patterns:
|
||||
- name: Numéro de passeport français
|
||||
regex: "\\b[0-9]{2}[A-Z]{2}[0-9]{5}\\b"
|
||||
score: 0.95
|
||||
- name: Passeport avec espaces
|
||||
regex: "\\b[0-9]{2}\\s[A-Z]{2}\\s[0-9]{5}\\b"
|
||||
score: 0.9
|
||||
context: ["passeport", "passport", "document de voyage"]
|
||||
|
||||
- name: FrenchDriverLicenseRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FR_DRIVER_LICENSE
|
||||
patterns:
|
||||
- name: Permis de conduire français
|
||||
regex: "\\b[0-9]{12}\\b"
|
||||
score: 0.8
|
||||
- name: Permis avec format
|
||||
regex: "\\b(?:permis\\s+(?:de\\s+)?conduire\\s*:?\\s*)?[0-9]{4}\\s?[0-9]{4}\\s?[0-9]{4}\\b"
|
||||
score: 0.9
|
||||
context: ["permis de conduire", "permis", "conduire", "licence"]
|
||||
26
conf/recognizers/PII/french/financial.yaml
Normal file
26
conf/recognizers/PII/french/financial.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
# Recognizer pour données financières françaises
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: FrenchTaxIDRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FR_TAX_ID
|
||||
patterns:
|
||||
- name: Numéro fiscal français
|
||||
regex: "\\b[0-9]{13}\\b"
|
||||
score: 0.85
|
||||
- name: Référence fiscale
|
||||
regex: "\\b(?:numéro\\s+fiscal\\s*:?\\s*)?[0-9]{4}\\s?[0-9]{4}\\s?[0-9]{5}\\b"
|
||||
score: 0.9
|
||||
context: ["numéro fiscal", "référence fiscale", "impôts", "SIP"]
|
||||
|
||||
- name: FrenchBankAccountRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FR_BANK_ACCOUNT
|
||||
patterns:
|
||||
- name: RIB français
|
||||
regex: "\\b[0-9]{5}\\s[0-9]{5}\\s[0-9]{11}\\s[0-9]{2}\\b"
|
||||
score: 0.95
|
||||
- name: Numéro de compte
|
||||
regex: "\\b(?:compte\\s*:?\\s*)?[0-9]{5}[0-9]{5}[0-9]{11}[0-9]{2}\\b"
|
||||
score: 0.9
|
||||
context: ["RIB", "compte bancaire", "numéro de compte", "relevé"]
|
||||
18
conf/recognizers/PII/french/social_security.yaml
Normal file
18
conf/recognizers/PII/french/social_security.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
# Recognizer pour numéros INSEE français
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: FrenchINSEERecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FR_SOCIAL_SECURITY_NUMBER
|
||||
patterns:
|
||||
- name: INSEE complet avec espaces
|
||||
regex: "\\b[12]\\s*[0-9]{2}\\s*(?:0[1-9]|1[0-2])\\s*(?:2[ABab]|[0-9]{2})\\s*[0-9]{3}\\s*[0-9]{3}[\\s]?[0-9]{2}\\b"
|
||||
score: 0.95
|
||||
- name: NIR compact
|
||||
regex: "\\b[12][0-9]{12}[0-9]{2}\\b"
|
||||
score: 0.85
|
||||
- name: INSEE avec tirets
|
||||
regex: "\\b[12]-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{3}-[0-9]{3}-[0-9]{2}\\b"
|
||||
score: 0.9
|
||||
context:
|
||||
["sécurité sociale", "insee", "nir", "numéro de sécu", "carte vitale"]
|
||||
Reference in New Issue
Block a user