This commit is contained in:
Biqoz
2025-09-15 19:04:38 +02:00
parent 3e70181b58
commit 3f9c007c9e
46 changed files with 919 additions and 146 deletions

View File

@@ -20,14 +20,26 @@ class OverlapResolver:
'BE_ENTERPRISE_NUMBER': 88,
'PHONE_NUMBER': 85,
'BE_PHONE_NUMBER': 85,
'TELEPHONE': 84,
'TELEPHONE_FRANCAIS': 86,
'IP_ADDRESS': 82,
'ADRESSE_FRANCAISE': 78, # Priorité plus élevée pour adresses françaises spécifiques
'BE_ADDRESS': 75,
'FR_ADDRESS': 75,
'ORGANIZATION': 65,
'LOCATION': 60,
'ADRESSE': 70, # Adresse générique avec priorité plus faible
'ORGANISATION': 65,
'LOCATION': 60, # Priorité plus faible que les adresses
'PERSON': 50,
'PERSON_NAME': 45,
'NRP': 40,
'URL': 35
'BE_PROFESSIONAL_ID': 40,
'FR_CIVILITY_TITLE': 85,
'FR_REGULATED_PROFESSION': 80,
'CARTE_IDENTITE_FRANCAISE': 78,
'PERMIS_CONDUIRE_FRANCAIS': 76,
'PASSEPORT_FRANCAIS': 77,
'URL': 35,
'MARKET_SHARE': 35
}
# Patterns pour identifier les organisations
@@ -112,12 +124,12 @@ class OverlapResolver:
# Correction 1: PERSON -> ORGANIZATION pour les noms d'entreprise
if result.entity_type == 'PERSON' and self._is_organization_name(entity_text):
corrected_result = RecognizerResult(
entity_type='ORGANIZATION',
entity_type='ORGANISATION',
start=result.start,
end=result.end,
score=result.score + 0.1 # Bonus de confiance
)
logger.debug(f"🔄 Correction PERSON -> ORGANIZATION: '{entity_text}'")
logger.debug(f"🔄 Correction PERSON -> ORGANISATION: '{entity_text}'")
corrected_results.append(corrected_result)
# Correction 2: Séparer IP des adresses physiques