finalyse
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import { useState } from "react";
|
||||
import { patterns } from "@/app/utils/highlightEntities";
|
||||
|
||||
interface EntityMapping {
|
||||
originalValue: string;
|
||||
@@ -8,25 +9,18 @@ interface EntityMapping {
|
||||
endIndex: number;
|
||||
}
|
||||
|
||||
// Nouvelle interface pour les résultats de Presidio Analyzer
|
||||
// L'API retourne des objets avec snake_case
|
||||
interface PresidioAnalyzerResult {
|
||||
entity_type: string;
|
||||
start: number;
|
||||
end: number;
|
||||
score: number;
|
||||
analysis_explanation?: {
|
||||
recognizer: string;
|
||||
pattern_name?: string;
|
||||
pattern?: string;
|
||||
validation_result?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
// Interface pour la réponse de l'API
|
||||
// La réponse de l'API utilise camelCase pour les clés principales
|
||||
interface ProcessDocumentResponse {
|
||||
text?: string;
|
||||
text?: string; // Texte original en cas de fallback
|
||||
anonymizedText?: string;
|
||||
piiCount?: number;
|
||||
analyzerResults?: PresidioAnalyzerResult[];
|
||||
error?: string;
|
||||
}
|
||||
@@ -66,101 +60,105 @@ export const useAnonymization = ({
|
||||
setEntityMappings([]);
|
||||
|
||||
try {
|
||||
console.log("🚀 Début anonymisation avec Presidio");
|
||||
|
||||
const formData = new FormData();
|
||||
|
||||
if (uploadedFile) {
|
||||
console.log("📁 Traitement fichier:", {
|
||||
name: uploadedFile.name,
|
||||
type: uploadedFile.type,
|
||||
size: uploadedFile.size
|
||||
});
|
||||
formData.append("file", uploadedFile);
|
||||
} else {
|
||||
console.log("📝 Traitement texte saisi");
|
||||
const textBlob = new Blob([textToProcess], { type: "text/plain" });
|
||||
const textFile = new File([textBlob], "input.txt", { type: "text/plain" });
|
||||
const textFile = new File([textBlob], "input.txt", {
|
||||
type: "text/plain",
|
||||
});
|
||||
formData.append("file", textFile);
|
||||
}
|
||||
|
||||
console.log("🔍 Appel à /api/process-document avec Presidio...");
|
||||
console.log("📦 FormData préparée:", Array.from(formData.entries()));
|
||||
|
||||
const response = await fetch("/api/process-document", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
console.log("📡 Réponse reçue:", {
|
||||
ok: response.ok,
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
headers: Object.fromEntries(response.headers.entries())
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
let errorMessage = `Erreur HTTP: ${response.status}`;
|
||||
|
||||
try {
|
||||
const responseText = await response.text();
|
||||
console.log("📄 Contenu de l'erreur:", responseText);
|
||||
|
||||
if (responseText.trim()) {
|
||||
try {
|
||||
const errorData = JSON.parse(responseText);
|
||||
if (errorData.error) {
|
||||
errorMessage = errorData.error;
|
||||
console.log("✅ Message détaillé récupéré:", errorMessage);
|
||||
}
|
||||
} catch (jsonError) {
|
||||
console.error("❌ Erreur parsing JSON:", jsonError); // ✅ Utiliser la variable
|
||||
console.error("❌ Réponse non-JSON:", responseText);
|
||||
errorMessage = `Erreur ${response.status}: Réponse invalide du serveur`;
|
||||
}
|
||||
}
|
||||
} catch (readError) {
|
||||
console.error("❌ Impossible de lire la réponse:", readError);
|
||||
const errorData = await response.json();
|
||||
if (errorData.error) errorMessage = errorData.error;
|
||||
} catch {
|
||||
/* Ignore */
|
||||
}
|
||||
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
const data: ProcessDocumentResponse = await response.json();
|
||||
console.log("📊 Réponse API:", data);
|
||||
|
||||
if (data.error) {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
|
||||
if (data.anonymizedText) {
|
||||
console.log("✅ Anonymisation réussie avec Presidio");
|
||||
// Utiliser camelCase pour les propriétés de la réponse principale
|
||||
if (data.anonymizedText && data.analyzerResults) {
|
||||
setOutputText(data.anonymizedText);
|
||||
|
||||
// Extraire les mappings depuis les résultats Presidio (plus d'erreur 'any')
|
||||
if (data.analyzerResults && data.text) {
|
||||
const mappings: EntityMapping[] = data.analyzerResults.map(
|
||||
(entity: PresidioAnalyzerResult, index: number) => ({
|
||||
originalValue: data.text!.substring(entity.start, entity.end),
|
||||
anonymizedValue: `[${entity.entity_type}${index + 1}]`,
|
||||
entityType: entity.entity_type,
|
||||
startIndex: entity.start,
|
||||
endIndex: entity.end,
|
||||
})
|
||||
);
|
||||
setEntityMappings(mappings);
|
||||
console.log("📋 Entités détectées:", mappings.length);
|
||||
console.log("🔍 Détails des entités:", mappings);
|
||||
}
|
||||
const entityTypeMap = new Map<string, string>();
|
||||
patterns.forEach((p) => {
|
||||
const match = p.regex.toString().match(/<([A-Z_]+)>/);
|
||||
if (match && match[1]) {
|
||||
entityTypeMap.set(match[1], p.label);
|
||||
}
|
||||
});
|
||||
|
||||
// 1. Compter les occurrences de chaque tag d'entité dans le texte anonymisé
|
||||
const tagCounts = new Map<string, number>();
|
||||
data.analyzerResults.forEach((result) => {
|
||||
const tag = `<${result.entity_type}>`;
|
||||
if (!tagCounts.has(result.entity_type)) {
|
||||
const count = (
|
||||
data.anonymizedText?.match(new RegExp(tag, "g")) || []
|
||||
).length;
|
||||
tagCounts.set(result.entity_type, count);
|
||||
}
|
||||
});
|
||||
|
||||
const seen = new Set<string>();
|
||||
const uniqueMappings: EntityMapping[] = [];
|
||||
const addedCounts = new Map<string, number>();
|
||||
|
||||
// 2. N'ajouter que les entités réellement anonymisées avec un compteur
|
||||
data.analyzerResults
|
||||
.sort((a, b) => a.start - b.start) // Trier par ordre d'apparition
|
||||
.forEach((result) => {
|
||||
const entityType = result.entity_type;
|
||||
const maxCount = tagCounts.get(entityType) || 0;
|
||||
const currentCount = addedCounts.get(entityType) || 0;
|
||||
|
||||
if (currentCount < maxCount) {
|
||||
const originalValue = textToProcess.substring(
|
||||
result.start,
|
||||
result.end
|
||||
);
|
||||
const frenchLabel = entityTypeMap.get(entityType) || entityType;
|
||||
const uniqueKey = `${frenchLabel}|${originalValue}`;
|
||||
|
||||
if (!seen.has(uniqueKey)) {
|
||||
const newCount = (addedCounts.get(entityType) || 0) + 1;
|
||||
addedCounts.set(entityType, newCount);
|
||||
|
||||
uniqueMappings.push({
|
||||
entityType: frenchLabel,
|
||||
originalValue: originalValue,
|
||||
anonymizedValue: `${frenchLabel} [${newCount}]`,
|
||||
startIndex: result.start,
|
||||
endIndex: result.end,
|
||||
});
|
||||
seen.add(uniqueKey);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
setEntityMappings(uniqueMappings);
|
||||
} else if (data.text) {
|
||||
console.log(
|
||||
"⚠️ Fallback: Presidio non disponible, texte original retourné"
|
||||
);
|
||||
setOutputText(data.text);
|
||||
setError("Presidio temporairement indisponible. Texte non anonymisé.");
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("❌ Erreur anonymisation complète:", error);
|
||||
setError(
|
||||
error instanceof Error
|
||||
? error.message
|
||||
|
||||
Reference in New Issue
Block a user