new full
This commit is contained in:
@@ -2,6 +2,7 @@ from typing import List
|
||||
from presidio_analyzer import RecognizerResult
|
||||
from entity_refiners import EntityRefinerManager
|
||||
from post_processors import DeduplicationProcessor, OverlapResolver
|
||||
from post_processors.cleanup_processor import CleanupProcessor
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -9,9 +10,10 @@ logger = logging.getLogger(__name__)
|
||||
class AnalysisPipeline:
|
||||
def __init__(self):
|
||||
self.refiner_manager = EntityRefinerManager()
|
||||
self.cleanup_processor = CleanupProcessor()
|
||||
self.overlap_resolver = OverlapResolver()
|
||||
self.deduplicator = DeduplicationProcessor()
|
||||
logger.info("🚀 Pipeline d'analyse initialisé")
|
||||
logger.info("🚀 Pipeline d'analyse initialisé avec nettoyage avancé")
|
||||
|
||||
def process(self, text: str, results: List[RecognizerResult], allow_list_terms: List[str]) -> List[RecognizerResult]:
|
||||
"""Traite les résultats à travers le pipeline complet"""
|
||||
@@ -38,10 +40,13 @@ class AnalysisPipeline:
|
||||
)
|
||||
refined_results.append(refined_result)
|
||||
|
||||
# 3. Résolution des chevauchements
|
||||
resolved_results = self.overlap_resolver.process(refined_results, text)
|
||||
# 3. Nettoyage avancé des résultats
|
||||
cleaned_results = self.cleanup_processor.process(refined_results)
|
||||
|
||||
# 4. Déduplication
|
||||
# 4. Résolution des chevauchements
|
||||
resolved_results = self.overlap_resolver.process(cleaned_results, text)
|
||||
|
||||
# 5. Déduplication
|
||||
final_results = self.deduplicator.process(resolved_results, text)
|
||||
|
||||
logger.info(f"🎯 Pipeline complet: {len(results)} -> {len(final_results)} entités")
|
||||
|
||||
Reference in New Issue
Block a user