new full
This commit is contained in:
20
conf/recognizers/Business/generic/api_secrets.yaml
Normal file
20
conf/recognizers/Business/generic/api_secrets.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# Recognizer pour clés API et secrets techniques
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: APISecretsRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: CLE_API_SECRETE
|
||||
patterns:
|
||||
- name: Clé API générique
|
||||
regex: "\\b(?:API[_\\s]?KEY|api[_\\s]?key)\\s*[=:]\\s*[A-Za-z0-9\\-_]{16,64}\\b"
|
||||
score: 1.0
|
||||
- name: Token d'accès
|
||||
regex: "\\b(?:access[_\\s]?token|token)\\s*[=:]\\s*[A-Za-z0-9\\-_\\.]{20,128}\\b"
|
||||
score: 0.95
|
||||
- name: Secret AWS
|
||||
regex: "\\b(?:AWS[_\\s]?SECRET|aws[_\\s]?secret)\\s*[=:]\\s*[A-Za-z0-9/+=]{40}\\b"
|
||||
score: 1.0
|
||||
- name: Clé privée
|
||||
regex: "\\b(?:private[_\\s]?key|secret[_\\s]?key)\\s*[=:]\\s*[A-Za-z0-9\\-_]{16,64}\\b"
|
||||
score: 0.95
|
||||
context: ["API", "clé", "secret", "token", "authentification", "accès"]
|
||||
20
conf/recognizers/Business/generic/contracts_references.yaml
Normal file
20
conf/recognizers/Business/generic/contracts_references.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# Recognizer pour contrats et références internes
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: ContractReferenceRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: REFERENCE_CONTRAT
|
||||
patterns:
|
||||
- name: Numéro de contrat
|
||||
regex: "\\b(?:contrat|contract)\\s*n?°?\\s*:?\\s*[A-Z0-9\\-/]{4,15}\\b"
|
||||
score: 0.95
|
||||
- name: Référence interne
|
||||
regex: "\\b(?:ref|référence|dossier)\\s*:?\\s*[A-Z]{2,4}[\\-/]?[0-9]{4,8}\\b"
|
||||
score: 0.9
|
||||
- name: ID transaction
|
||||
regex: "\\b(?:transaction|trans)\\s*ID\\s*:?\\s*[A-Z0-9]{6,12}\\b"
|
||||
score: 0.95
|
||||
- name: Numéro de facture
|
||||
regex: "\\b(?:facture|invoice)\\s*n?°?\\s*:?\\s*[A-Z0-9\\-/]{4,12}\\b"
|
||||
score: 0.9
|
||||
context: ["contrat", "référence", "dossier", "facture", "transaction", "commande"]
|
||||
20
conf/recognizers/Business/generic/employee_client_ids.yaml
Normal file
20
conf/recognizers/Business/generic/employee_client_ids.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# Recognizer pour identifiants employés et clients
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: EmployeeClientIDRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: ID_PROFESSIONNEL_BELGE
|
||||
patterns:
|
||||
- name: Matricule employé
|
||||
regex: "\\b(?:matricule|employee|emp)\\s*:?\\s*[A-Z0-9]{4,10}\\b"
|
||||
score: 0.95
|
||||
- name: ID client
|
||||
regex: "\\b(?:client|customer)\\s*ID\\s*:?\\s*[A-Z0-9]{4,12}\\b"
|
||||
score: 0.95
|
||||
- name: Code utilisateur
|
||||
regex: "\\b(?:user|utilisateur)\\s*:?\\s*[a-z]+\\.[a-z]+\\b"
|
||||
score: 0.9
|
||||
- name: Identifiant RH
|
||||
regex: "\\b(?:RH|HR)[\\-/]?[0-9]{4,8}\\b"
|
||||
score: 0.85
|
||||
context: ["matricule", "employé", "client", "utilisateur", "ID", "identifiant"]
|
||||
29
conf/recognizers/Business/generic/financial_amounts.yaml
Normal file
29
conf/recognizers/Business/generic/financial_amounts.yaml
Normal file
@@ -0,0 +1,29 @@
|
||||
# Recognizer pour montants financiers et devises
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: FinancialAmountRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: MONTANT_FINANCIER
|
||||
patterns:
|
||||
- name: Montant avec devise EUR
|
||||
regex: "\\b(?:[0-9]{1,3}(?:[\\s.,][0-9]{3})*|[0-9]+)(?:[.,][0-9]{1,2})?\\s*€\\b"
|
||||
score: 0.95
|
||||
- name: Montant avec devise USD
|
||||
regex: "\\b(?:[0-9]{1,3}(?:[\\s.,][0-9]{3})*|[0-9]+)(?:[.,][0-9]{1,2})?\\s*(?:USD|\\$)\\b"
|
||||
score: 0.95
|
||||
- name: Montant abrégé avec K/M
|
||||
regex: ""
|
||||
score: 0.9
|
||||
- name: Salaire annuel
|
||||
regex: ""
|
||||
score: 0.95
|
||||
context:
|
||||
[
|
||||
"montant",
|
||||
"prix",
|
||||
"coût",
|
||||
"budget",
|
||||
"salaire",
|
||||
"rémunération",
|
||||
"facture",
|
||||
]
|
||||
@@ -5,30 +5,52 @@ recognizer_registry:
|
||||
supported_language: fr
|
||||
supported_entity: MARKET_SHARE
|
||||
patterns:
|
||||
# Pourcentages de marché
|
||||
# Pourcentages simples (nouveau pattern plus permissif)
|
||||
- name: Simple Percentage
|
||||
regex: "\\b\\d{1,2}(?:[,.]\\d{1,2})?%\\b"
|
||||
score: 0.7
|
||||
|
||||
# Part de marché explicite
|
||||
- name: Explicit Market Share
|
||||
regex: "\\b(?:part\\s+de\\s+marché|parts?\\s+de\\s+marché)\\b"
|
||||
score: 0.9
|
||||
|
||||
# Pourcentages de marché avec contexte
|
||||
- name: Market Share Percentage
|
||||
regex: "\\b(?:détient|possède|contrôle|représente)?\\s*(?:environ\\s+)?(?:\\d{1,2}(?:[,.]\\d{1,2})?%)\\s*(?:de\\s+(?:part\\s+de\\s+)?marché|du\\s+marché|de\\s+parts?)\\b"
|
||||
score: 0.9
|
||||
|
||||
|
||||
# Positions de marché
|
||||
- name: Market Position
|
||||
regex: "\\b(?:leader|numéro\\s+\\d+|\\d+(?:er|ème)\\s+acteur|position\\s+dominante|monopole)\\s+(?:du\\s+)?(?:marché|secteur)\\b"
|
||||
score: 0.85
|
||||
|
||||
|
||||
# Parts relatives
|
||||
- name: Relative Market Share
|
||||
regex: "\\b(?:majoritaire|minoritaire|principale|significative)\\s+(?:part\\s+de\\s+)?marché\\b"
|
||||
score: 0.8
|
||||
|
||||
|
||||
# Données de concentration
|
||||
- name: Market Concentration
|
||||
regex: "\\b(?:concentration|consolidation|fusion)\\s+(?:du\\s+)?marché\\b"
|
||||
score: 0.75
|
||||
|
||||
|
||||
# Chiffres d'affaires relatifs
|
||||
- name: Revenue Share
|
||||
regex: "\\b(?:\\d{1,2}(?:[,.]\\d{1,2})?%)\\s*(?:du\\s+)?(?:chiffre\\s+d'affaires|CA|revenus?)\\s+(?:du\\s+)?(?:marché|secteur)\\b"
|
||||
score: 0.85
|
||||
|
||||
|
||||
context:
|
||||
["part de marché", "position concurrentielle", "leader", "concurrent", "secteur", "industrie", "chiffre d'affaires", "revenus", "concentration", "monopole", "oligopole"]
|
||||
[
|
||||
"part de marché",
|
||||
"position concurrentielle",
|
||||
"leader",
|
||||
"concurrent",
|
||||
"secteur",
|
||||
"industrie",
|
||||
"chiffre d'affaires",
|
||||
"revenus",
|
||||
"concentration",
|
||||
"monopole",
|
||||
"oligopole",
|
||||
]
|
||||
|
||||
71
conf/recognizers/Business/generic/professional_data.yaml
Normal file
71
conf/recognizers/Business/generic/professional_data.yaml
Normal file
@@ -0,0 +1,71 @@
|
||||
# Recognizer pour données professionnelles génériques (France/Belgique)
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
# Recognizer pour titres de civilité
|
||||
- name: GenericCivilityTitleRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: TITRE_CIVILITE
|
||||
patterns:
|
||||
- name: Titres de civilité
|
||||
regex: "\\b(?:M\\.|Mme|Mlle|Dr\\.|Pr\\.|Prof\\.|Docteur|Professeur|Maître|Me\\.)(?=\\s+[A-ZÀ-Ÿ])"
|
||||
score: 0.9
|
||||
- name: Titres honorifiques
|
||||
regex: "\\b(?:Monsieur|Madame|Mademoiselle)(?=\\s+[A-ZÀ-Ÿ])"
|
||||
score: 0.85
|
||||
context: ["identité", "titre", "civilité"]
|
||||
|
||||
# Recognizer pour données professionnelles générales
|
||||
- name: GenericProfessionalDataRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: DONNEES_PROFESSIONNELLES
|
||||
patterns:
|
||||
- name: Titre de poste
|
||||
regex: "\\b(?:directeur|directrice|manager|responsable|chef|ingénieur|ingénieure|consultant|consultante)\\s+[a-zà-ÿ\\s]+\\b"
|
||||
score: 0.8
|
||||
- name: Département
|
||||
regex: "\\b(?:département|service|division)\\s+[A-ZÀ-Ÿ][a-zà-ÿ\\s]+\\b"
|
||||
score: 0.75
|
||||
- name: Adresse professionnelle
|
||||
regex: "\\b(?:siège\\s+social|adresse\\s+professionnelle)\\s*:?\\s*[0-9]{1,4}\\s+[A-ZÀ-Ÿ][a-zà-ÿ\\s'-]+\\b"
|
||||
score: 0.9
|
||||
- name: Email professionnel
|
||||
regex: "\\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}\\b"
|
||||
score: 0.85
|
||||
- name: Numéro IEC
|
||||
regex: "\\b(?:n°\\s*IEC|numéro\\s*IEC|IEC)\\s*:?\\s*([0-9]{6,8})\\b"
|
||||
score: 0.9
|
||||
- name: Avocat
|
||||
regex: "\\b(?:avocat|avocate)\\b"
|
||||
score: 0.9
|
||||
- name: Expert-comptable
|
||||
regex: "\\b(?:expert-comptable|expert\\s+comptable)\\b"
|
||||
score: 0.99
|
||||
- name: Notaire
|
||||
regex: "\\b(?:notaire)\\b"
|
||||
score: 0.95
|
||||
- name: Médecin
|
||||
regex: "\\b(?:médecin|docteur\\s+en\\s+médecine)\\b"
|
||||
score: 0.95
|
||||
# Données spécifiques belges intégrées
|
||||
- name: Numéro ONSS employeur
|
||||
regex: "\\b(?:ONSS|onss)\\s*:?\\s*[0-9]{7}\\b"
|
||||
score: 0.95
|
||||
- name: Numéro patronal
|
||||
regex: "\\b(?:numéro\\s+)?patronal\\s*:?\\s*[0-9]{7}\\b"
|
||||
score: 0.9
|
||||
context:
|
||||
[
|
||||
"professionnel",
|
||||
"travail",
|
||||
"bureau",
|
||||
"entreprise",
|
||||
"poste",
|
||||
"fonction",
|
||||
"réglementé",
|
||||
"ordre",
|
||||
"diplôme",
|
||||
"ONSS",
|
||||
"patronal",
|
||||
"employeur",
|
||||
"siège social"
|
||||
]
|
||||
20
conf/recognizers/Business/generic/trade_secrets.yaml
Normal file
20
conf/recognizers/Business/generic/trade_secrets.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# Recognizer pour secrets d'affaires et projets
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: TradeSecretsRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: SECRET_COMMERCIAL
|
||||
patterns:
|
||||
- name: Nom de projet interne
|
||||
regex: "\\b(?:projet|project)\\s+[A-Z][a-zA-Z]{3,15}\\b"
|
||||
score: 0.85
|
||||
- name: Code projet
|
||||
regex: "\\b(?:projet|project)\\s*:?\\s*[A-Z]{2,4}[\\-/]?[0-9]{2,4}\\b"
|
||||
score: 0.9
|
||||
- name: Plan stratégique
|
||||
regex: "\\b(?:plan|stratégie)\\s+(?:stratégique|business)\\s+[0-9]{4}\\b"
|
||||
score: 0.9
|
||||
- name: Formule interne
|
||||
regex: "\\b(?:formule|recette|procédé)\\s+[A-Z][\\-0-9A-Z]{2,10}\\b"
|
||||
score: 0.85
|
||||
context: ["projet", "stratégique", "confidentiel", "interne", "secret", "propriétaire"]
|
||||
Reference in New Issue
Block a user