From f3b06fa2ee7a72f2a8d071f170ea47de37397002 Mon Sep 17 00:00:00 2001
From: nacim <moudjeb.nacim@gmail.com>
Date: Mon, 28 Jul 2025 18:01:05 +0000
Subject: [PATCH] Actualiser app.py

---
 app.py | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/app.py b/app.py
index 41acf16..c0eb9cf 100644
--- a/app.py
+++ b/app.py
@@ -5,14 +5,14 @@ from flask import Flask, request, jsonify, make_response
 
 from presidio_analyzer import AnalyzerEngineProvider
 
-# Config du logging
+# Configuration du logging
 logging.basicConfig(level=logging.INFO,
                     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 
 app = Flask(__name__)
 
-# Initialisation Presidio Analyzer via Provider
+# Initialisation du moteur Presidio via Provider
 analyzer = None
 try:
     logger.info("--- Presidio Analyzer Service Starting ---")
@@ -24,12 +24,10 @@ except Exception as e:
     logger.exception("Error during AnalyzerEngine initialization.")
     analyzer = None
 
-
-# Regex IBAN strict : 2 lettres, 2 chiffres, puis 4 groupes de 4 alphanum (minimum),
-# avec optional whitespace between groups
+# Regex stricte pour détecter l'IBAN (lettres majuscules ou minuscules, chiffres, espaces optionnels)
 IBAN_REGEX = re.compile(r"\b[A-Z]{2}[0-9]{2}(?:\s?[A-Z0-9]{4}){4,7}\b", re.IGNORECASE)
 
-# Labels / titres à exclure de l'anonymisation (en minuscules pour normalisation)
+# Labels / titres à exclure de l'anonymisation (en minuscules pour comparaison simple)
 IGNORE_LABELS = {
     "témoins",
     "témoins clés",
@@ -38,13 +36,12 @@ IGNORE_LABELS = {
     "contexte financier",
     "données sensibles",
     "contexte",
-    # ajouter d'autres labels ici si besoin
+    # Ajoute d'autres labels si besoin
 }
 
 def normalize_label(txt):
     return txt.strip().lower()
 
-
 @app.route('/analyze', methods=['POST'])
 def analyze_text():
     if not analyzer:
@@ -68,37 +65,36 @@ def analyze_text():
             ent_text = text_to_analyze[res.start:res.end].strip()
             ent_text_norm = normalize_label(ent_text)
 
-            # 1. Ignorer les entités correspondant exactement aux labels/titres à préserver
+            # 1. Ignorer les entités correspondant aux labels/titres à préserver
             if ent_text_norm in IGNORE_LABELS:
-                logger.debug(f"Skip anonymizing label: '{ent_text}'")
+                logger.debug(f"Skipping anonymization of label: '{ent_text}'")
                 continue
 
-            # 2. Si entité de type IBAN, resserrer la sélection strictement au format IBAN
+            # 2. Si entité de type IBAN, recadrer strictement sur le match IBAN
             if res.entity_type == "IBAN":
                 match = IBAN_REGEX.search(ent_text)
                 if match:
                     true_iban = match.group(0)
-                    # Recalcule start/end dans le texte original
                     start_offset = ent_text.find(true_iban)
                     if start_offset != -1:
-                        old_start = res.start
-                        old_end = res.end
+                        old_start, old_end = res.start, res.end
                         res.start += start_offset
                         res.end = res.start + len(true_iban)
                         ent_text = true_iban
-                        logger.debug(f"Correct IBAN span from ({old_start}-{old_end}) to ({res.start}-{res.end}): '{ent_text}'")
+                        logger.debug(
+                            f"Adjusted IBAN span from ({old_start}-{old_end}) to ({res.start}-{res.end}): '{ent_text}'"
+                        )
                     else:
-                        # Aucun start trouvé (peu probable), garder tel quel
-                        logger.warning(f"Could not find exact IBAN substring in entity text: '{ent_text}'")
+                        logger.warning(f"Cannot find IBAN substring inside entity text: '{ent_text}'")
                 else:
-                    # Pas de correspondance valide au regex IBAN, on peut choisir d'ignorer ou garder tel quel
+                    # Si pas de correspondance IBAN, on ignore cette entité (évite faux positifs)
                     logger.warning(f"Entity IBAN does not match IBAN regex: '{ent_text}'")
+                    continue  # Ne pas ajouter l'entité
 
             filtered_results.append(res)
 
+        # Conversion en dict JSON pour renvoyer la réponse
         response_data = [res.to_dict() for res in filtered_results]
-
-        # **Important :** retourne uniquement les entités à anonymiser (sans labels exclus)
         return make_response(jsonify(response_data), 200)
 
     except Exception as e: