presidio modulaire

This commit is contained in:
nBiqoz
2025-09-07 12:29:08 +02:00
parent 85d95d05e5
commit c62e5b92d5
42 changed files with 1802 additions and 324 deletions

View File

@@ -0,0 +1,17 @@
# Recognizer pour SIRET/SIREN français
recognizer_registry:
recognizers:
- name: FrenchSIRETRecognizer
supported_language: fr
supported_entity: FR_SIRET
patterns:
- name: SIRET complet
regex: "\\b[0-9]{3}\\s?[0-9]{3}\\s?[0-9]{3}\\s?[0-9]{5}\\b"
score: 0.9
- name: SIREN
regex: "\\b[0-9]{3}\\s?[0-9]{3}\\s?[0-9]{3}\\b"
score: 0.85
- name: SIRET avec espaces
regex: "\\bSIRET\\s*:?\\s*[0-9]{3}\\s[0-9]{3}\\s[0-9]{3}\\s[0-9]{5}\\b"
score: 0.95
context: ["SIRET", "SIREN", "établissement", "entreprise", "société"]