Actualiser app.py
This commit is contained in:
24
app.py
24
app.py
@@ -23,29 +23,7 @@ except Exception as e:
|
|||||||
logger.exception("Error during AnalyzerEngine initialization.")
|
logger.exception("Error during AnalyzerEngine initialization.")
|
||||||
analyzer = None
|
analyzer = None
|
||||||
|
|
||||||
# Regex strict pour IBAN belge format attendu
|
|
||||||
IBAN_REGEX = re.compile(r"\b[A-Z]{2}[0-9]{2}(?:\s[0-9]{4}){3}\b", re.IGNORECASE)
|
|
||||||
|
|
||||||
# Regex IPv4
|
|
||||||
IPV4_REGEX = re.compile(
|
|
||||||
r"\b(?:(?:25[0-5]|2[0-4][0-9]|1\d{2}|[1-9]?\d)\.){3}"
|
|
||||||
r"(?:25[0-5]|2[0-4][0-9]|1\d{2}|[1-9]?\d)\b"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Liste des labels/phrases à exclure d’anonymisation (en minuscules)
|
|
||||||
IGNORE_LABELS = {
|
|
||||||
"témoins",
|
|
||||||
"témoins clés",
|
|
||||||
"coordonnées",
|
|
||||||
"coordonnées bancaires",
|
|
||||||
"contexte financier",
|
|
||||||
"données sensibles",
|
|
||||||
"contexte",
|
|
||||||
"montrent",
|
|
||||||
"montrent des",
|
|
||||||
"montrent des irrégularités",
|
|
||||||
"bénéficiaire",
|
|
||||||
}
|
|
||||||
|
|
||||||
def normalize_label(text: str) -> str:
|
def normalize_label(text: str) -> str:
|
||||||
return text.strip().lower()
|
return text.strip().lower()
|
||||||
@@ -91,7 +69,7 @@ def analyze_text():
|
|||||||
logger.warning(f"Invalid IBAN detected, skipping: '{ent_text}'")
|
logger.warning(f"Invalid IBAN detected, skipping: '{ent_text}'")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Recadrage IP_ADDRESS strict IPv4 (wildcard possible pour IPv6 si besoin)
|
# Recadrage IP_ADDRESS strict IPv4
|
||||||
if res.entity_type == "IP_ADDRESS":
|
if res.entity_type == "IP_ADDRESS":
|
||||||
match = IPV4_REGEX.search(ent_text)
|
match = IPV4_REGEX.search(ent_text)
|
||||||
if match:
|
if match:
|
||||||
|
|||||||
Reference in New Issue
Block a user