Update default.yaml

This commit is contained in:
Nacim
2025-06-23 13:09:24 +02:00
committed by GitHub
parent b6e4c1a8e9
commit 69affabf18

View File

@@ -1,13 +1,61 @@
# =====================================================================
# CONFIGURATION PRESIDIO POUR DOCUMENTS FRANÇAIS/BELGES
# =====================================================================
# Configuration NLP unifiée
nlp_configuration:
nlp_engine_name: spacy
models:
- lang_code: en
model_name: en_core_web_lg
- lang_code: fr
model_name: fr_core_news_sm
# Configuration du moteur de langage (NLP)
nlp_engine_name: spacy
supported_languages: [en, fr]
models:
- lang_code: en
model_name: en_core_web_lg
- lang_code: fr
model_name: fr_core_news_sm
ner_model_configuration:
model_to_presidio_entity_mapping:
PER: PERSON
PERSON: PERSON
ORG: ORGANIZATION
ORGANIZATION: ORGANIZATION
LOC: LOCATION
LOCATION: LOCATION
GPE: LOCATION
MISC: ORGANIZATION
confidence_threshold:
default: 0.35
EMAIL_ADDRESS: 0.4
PHONE_NUMBER: 0.5
PERSON: 0.6
labels_to_ignore:
- MISC
- CARDINAL
- EVENT
- LANGUAGE
- LAW
- MONEY
- ORDINAL
- PERCENT
- PRODUCT
- QUANTITY
- WORK_OF_ART
# Configuration des langues supportées
supported_languages:
- en
- fr
# Configuration du registre
recognizer_registry:
- default
# Liste d'exclusion pour éviter les faux positifs
allow_list:
- Contrat
- Document
- Société
- Montant
- Partie
- Annexe
- Euro
- EUR
- Taux
- Valeur
- Prix