presidio modulaire
This commit is contained in:
24
conf/recognizers/Business/belgian/enterprise_numbers.yaml
Normal file
24
conf/recognizers/Business/belgian/enterprise_numbers.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Recognizer pour numéros d'entreprise belges
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: BelgianEnterpriseRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: BE_ENTERPRISE_NUMBER
|
||||
patterns:
|
||||
- name: Numéro BCE avec deux points
|
||||
regex: "(?<=\\bBCE\\s*:\\s*)((BE)?\\s?0\\d{3}[\\.\\s]?\\d{3}[\\.\\s]?\\d{3})\\b"
|
||||
score: 1.0
|
||||
- name: Numéro TVA avec deux points
|
||||
regex: "(?<=\\bTVA\\s*:\\s*)(BE\\d{4}\\.\\d{3}\\.\\d{3})\\b"
|
||||
score: 1.0
|
||||
- name: Numéro d'entreprise général
|
||||
regex: "(?<!(?:BCE|TVA)\\s*:\\s*)\\b(BE)?\\s?0\\d{3}[\\.\\s]?\\d{3}[\\.\\s]?\\d{3}\\b"
|
||||
score: 0.9
|
||||
- name: Numéro ONSS
|
||||
regex: "\\bONSS\\s*:?\\s*\\d{7}\\b"
|
||||
score: 0.95
|
||||
- name: Numéro patronal
|
||||
regex: "\\b(?:numéro\\s+)?patronal\\s*:?\\s*\\d{7}\\b"
|
||||
score: 0.9
|
||||
context:
|
||||
["TVA", "intracommunautaire", "ONSS", "entreprise", "patronal"]
|
||||
28
conf/recognizers/Business/belgian/organization_names.yaml
Normal file
28
conf/recognizers/Business/belgian/organization_names.yaml
Normal file
@@ -0,0 +1,28 @@
|
||||
# Recognizer pour noms d'organisations belges
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: SmartOrganizationRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: ORGANIZATION
|
||||
patterns:
|
||||
# Noms avec suffixes typiques d'entreprise
|
||||
- name: Noms entreprise avec suffixes
|
||||
regex: "\\b([A-Z][a-zA-Zà-ÿ]+(?:Consult|Tech|Soft|Digital|Solutions|Services|Group|Corp|Company|Systems|Data|Cloud|Web|Net|Info|Cyber|Smart|Pro|Expert|Plus|Max|Global|International|Europe|Belgium|Brussels|Wallonie|Flandre))\\b(?!\\s*\\([^)]*(?:BCE|TVA)[^)]*\\))"
|
||||
score: 0.9
|
||||
|
||||
# Formes légales complètes avec nom d'entreprise
|
||||
- name: Formes légales complètes
|
||||
regex: "\\b((?:SPRL|SRL|SA|ASBL|SCS|SNC)\\s+[A-Z][a-zA-Zà-ÿ]+(?:\\s+[A-Z][a-zA-Zà-ÿ]+)*)(?!\\s*\\([^)]*(?:BCE|TVA)[^)]*\\))"
|
||||
score: 0.95
|
||||
|
||||
# Noms d'entreprise avec contexte spécifique (garder pour autres cas)
|
||||
- name: Noms avec contexte entreprise
|
||||
regex: "(?<=\\b(?:société|entreprise)\\s+)([A-Z][a-zA-Zà-ÿ]+(?:\\s+[A-Z][a-zA-Zà-ÿ]+)*)(?!\\s*\\([^)]*(?:BCE|TVA)[^)]*\\))"
|
||||
score: 0.85
|
||||
|
||||
# Noms précédés de "gérant de la"
|
||||
- name: Noms après gérant
|
||||
regex: "(?<=gérant\\s+de\\s+la\\s+)([A-Z][a-zA-Zà-ÿ]+(?:\\s+[A-Z][a-zA-Zà-ÿ]+)*)(?!\\s*\\([^)]*(?:BCE|TVA)[^)]*\\))"
|
||||
score: 0.8
|
||||
|
||||
context: ["société", "entreprise", "gérant de la", "administrateur"]
|
||||
20
conf/recognizers/Business/belgian/professional_ids.yaml
Normal file
20
conf/recognizers/Business/belgian/professional_ids.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
# Recognizer pour identifiants professionnels belges
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: BelgianProfessionalIdRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: BE_PROFESSIONAL_ID
|
||||
patterns:
|
||||
- name: Numéro IEC avec deux points
|
||||
regex: "(?<=\\bIEC\\s*:\\s*)\\d{6}\\b"
|
||||
score: 1.0
|
||||
- name: Numéro IEC général
|
||||
regex: "(?<!IEC\\s*:\\s*)\\b(?:n°\\s*)?IEC\\s*:?\\s*\\d{6}\\b"
|
||||
score: 0.9
|
||||
- name: Numéro d'avocat
|
||||
regex: "\\b(?:avocat\\s+)?n°\\s*\\d{4,6}\\b"
|
||||
score: 0.8
|
||||
- name: Numéro de médecin
|
||||
regex: "\\b(?:Dr\\.|médecin)\\s*n°\\s*\\d{5,7}\\b"
|
||||
score: 0.85
|
||||
context: ["expert-comptable", "IEC", "avocat", "médecin", "professionnel"]
|
||||
17
conf/recognizers/Business/french/siret_siren.yaml
Normal file
17
conf/recognizers/Business/french/siret_siren.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
# Recognizer pour SIRET/SIREN français
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: FrenchSIRETRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: FR_SIRET
|
||||
patterns:
|
||||
- name: SIRET complet
|
||||
regex: "\\b[0-9]{3}\\s?[0-9]{3}\\s?[0-9]{3}\\s?[0-9]{5}\\b"
|
||||
score: 0.9
|
||||
- name: SIREN
|
||||
regex: "\\b[0-9]{3}\\s?[0-9]{3}\\s?[0-9]{3}\\b"
|
||||
score: 0.85
|
||||
- name: SIRET avec espaces
|
||||
regex: "\\bSIRET\\s*:?\\s*[0-9]{3}\\s[0-9]{3}\\s[0-9]{3}\\s[0-9]{5}\\b"
|
||||
score: 0.95
|
||||
context: ["SIRET", "SIREN", "établissement", "entreprise", "société"]
|
||||
34
conf/recognizers/Business/generic/market_share.yaml
Normal file
34
conf/recognizers/Business/generic/market_share.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
# Recognizer pour parts de marché
|
||||
recognizer_registry:
|
||||
recognizers:
|
||||
- name: MarketShareRecognizer
|
||||
supported_language: fr
|
||||
supported_entity: MARKET_SHARE
|
||||
patterns:
|
||||
# Pourcentages de marché
|
||||
- name: Market Share Percentage
|
||||
regex: "\\b(?:détient|possède|contrôle|représente)?\\s*(?:environ\\s+)?(?:\\d{1,2}(?:[,.]\\d{1,2})?%)\\s*(?:de\\s+(?:part\\s+de\\s+)?marché|du\\s+marché|de\\s+parts?)\\b"
|
||||
score: 0.9
|
||||
|
||||
# Positions de marché
|
||||
- name: Market Position
|
||||
regex: "\\b(?:leader|numéro\\s+\\d+|\\d+(?:er|ème)\\s+acteur|position\\s+dominante|monopole)\\s+(?:du\\s+)?(?:marché|secteur)\\b"
|
||||
score: 0.85
|
||||
|
||||
# Parts relatives
|
||||
- name: Relative Market Share
|
||||
regex: "\\b(?:majoritaire|minoritaire|principale|significative)\\s+(?:part\\s+de\\s+)?marché\\b"
|
||||
score: 0.8
|
||||
|
||||
# Données de concentration
|
||||
- name: Market Concentration
|
||||
regex: "\\b(?:concentration|consolidation|fusion)\\s+(?:du\\s+)?marché\\b"
|
||||
score: 0.75
|
||||
|
||||
# Chiffres d'affaires relatifs
|
||||
- name: Revenue Share
|
||||
regex: "\\b(?:\\d{1,2}(?:[,.]\\d{1,2})?%)\\s*(?:du\\s+)?(?:chiffre\\s+d'affaires|CA|revenus?)\\s+(?:du\\s+)?(?:marché|secteur)\\b"
|
||||
score: 0.85
|
||||
|
||||
context:
|
||||
["part de marché", "position concurrentielle", "leader", "concurrent", "secteur", "industrie", "chiffre d'affaires", "revenus", "concentration", "monopole", "oligopole"]
|
||||
Reference in New Issue
Block a user