This commit is contained in:
Biqoz
2025-09-15 19:04:38 +02:00
parent 3e70181b58
commit 3f9c007c9e
46 changed files with 919 additions and 146 deletions

View File

@@ -2,6 +2,7 @@ from typing import List
from presidio_analyzer import RecognizerResult
from entity_refiners import EntityRefinerManager
from post_processors import DeduplicationProcessor, OverlapResolver
from post_processors.cleanup_processor import CleanupProcessor
import logging
logger = logging.getLogger(__name__)
@@ -9,9 +10,10 @@ logger = logging.getLogger(__name__)
class AnalysisPipeline:
def __init__(self):
self.refiner_manager = EntityRefinerManager()
self.cleanup_processor = CleanupProcessor()
self.overlap_resolver = OverlapResolver()
self.deduplicator = DeduplicationProcessor()
logger.info("🚀 Pipeline d'analyse initialisé")
logger.info("🚀 Pipeline d'analyse initialisé avec nettoyage avancé")
def process(self, text: str, results: List[RecognizerResult], allow_list_terms: List[str]) -> List[RecognizerResult]:
"""Traite les résultats à travers le pipeline complet"""
@@ -38,10 +40,13 @@ class AnalysisPipeline:
)
refined_results.append(refined_result)
# 3. Résolution des chevauchements
resolved_results = self.overlap_resolver.process(refined_results, text)
# 3. Nettoyage avancé des résultats
cleaned_results = self.cleanup_processor.process(refined_results)
# 4. Déduplication
# 4. Résolution des chevauchements
resolved_results = self.overlap_resolver.process(cleaned_results, text)
# 5. Déduplication
final_results = self.deduplicator.process(resolved_results, text)
logger.info(f"🎯 Pipeline complet: {len(results)} -> {len(final_results)} entités")