diff --git a/conf/default.yaml b/conf/default.yaml index 77459fd..f655d79 100644 --- a/conf/default.yaml +++ b/conf/default.yaml @@ -1,13 +1,61 @@ -# ===================================================================== -# CONFIGURATION PRESIDIO POUR DOCUMENTS FRANÇAIS/BELGES -# ===================================================================== +# Configuration NLP unifiée +nlp_configuration: + nlp_engine_name: spacy + models: + - lang_code: en + model_name: en_core_web_lg + - lang_code: fr + model_name: fr_core_news_sm -# Configuration du moteur de langage (NLP) -nlp_engine_name: spacy -supported_languages: [en, fr] -models: - - lang_code: en - model_name: en_core_web_lg - - lang_code: fr - model_name: fr_core_news_sm + ner_model_configuration: + model_to_presidio_entity_mapping: + PER: PERSON + PERSON: PERSON + ORG: ORGANIZATION + ORGANIZATION: ORGANIZATION + LOC: LOCATION + LOCATION: LOCATION + GPE: LOCATION + MISC: ORGANIZATION + confidence_threshold: + default: 0.35 + EMAIL_ADDRESS: 0.4 + PHONE_NUMBER: 0.5 + PERSON: 0.6 + + labels_to_ignore: + - MISC + - CARDINAL + - EVENT + - LANGUAGE + - LAW + - MONEY + - ORDINAL + - PERCENT + - PRODUCT + - QUANTITY + - WORK_OF_ART + +# Configuration des langues supportées +supported_languages: + - en + - fr + +# Configuration du registre +recognizer_registry: + - default + +# Liste d'exclusion pour éviter les faux positifs +allow_list: + - Contrat + - Document + - Société + - Montant + - Partie + - Annexe + - Euro + - EUR + - Taux + - Valeur + - Prix