174 lines
5.1 KiB
TypeScript
174 lines
5.1 KiB
TypeScript
import { useState } from "react";
|
|
import { patterns } from "@/app/utils/highlightEntities";
|
|
|
|
interface EntityMapping {
|
|
originalValue: string;
|
|
anonymizedValue: string;
|
|
entityType: string;
|
|
startIndex: number;
|
|
endIndex: number;
|
|
}
|
|
|
|
// L'API retourne des objets avec snake_case
|
|
interface PresidioAnalyzerResult {
|
|
entity_type: string;
|
|
start: number;
|
|
end: number;
|
|
score: number;
|
|
}
|
|
|
|
// La réponse de l'API utilise camelCase pour les clés principales
|
|
interface ProcessDocumentResponse {
|
|
text?: string; // Texte original en cas de fallback
|
|
anonymizedText?: string;
|
|
analyzerResults?: PresidioAnalyzerResult[];
|
|
error?: string;
|
|
}
|
|
|
|
interface AnonymizationLogicProps {
|
|
sourceText: string;
|
|
fileContent: string;
|
|
uploadedFile: File | null;
|
|
setOutputText: (text: string) => void;
|
|
setError: (error: string | null) => void;
|
|
setEntityMappings: (mappings: EntityMapping[]) => void;
|
|
}
|
|
|
|
export const useAnonymization = ({
|
|
sourceText,
|
|
fileContent,
|
|
uploadedFile,
|
|
setOutputText,
|
|
setError,
|
|
setEntityMappings,
|
|
}: AnonymizationLogicProps) => {
|
|
const [isProcessing, setIsProcessing] = useState(false);
|
|
|
|
const anonymizeData = async () => {
|
|
const textToProcess = sourceText || fileContent || "";
|
|
|
|
if (!textToProcess.trim()) {
|
|
setError(
|
|
"Veuillez saisir du texte à anonymiser ou télécharger un fichier"
|
|
);
|
|
return;
|
|
}
|
|
|
|
setIsProcessing(true);
|
|
setError(null);
|
|
setOutputText("");
|
|
setEntityMappings([]);
|
|
|
|
try {
|
|
const formData = new FormData();
|
|
if (uploadedFile) {
|
|
formData.append("file", uploadedFile);
|
|
} else {
|
|
const textBlob = new Blob([textToProcess], { type: "text/plain" });
|
|
const textFile = new File([textBlob], "input.txt", {
|
|
type: "text/plain",
|
|
});
|
|
formData.append("file", textFile);
|
|
}
|
|
|
|
const response = await fetch("/api/process-document", {
|
|
method: "POST",
|
|
body: formData,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
let errorMessage = `Erreur HTTP: ${response.status}`;
|
|
try {
|
|
const errorData = await response.json();
|
|
if (errorData.error) errorMessage = errorData.error;
|
|
} catch {
|
|
/* Ignore */
|
|
}
|
|
throw new Error(errorMessage);
|
|
}
|
|
|
|
const data: ProcessDocumentResponse = await response.json();
|
|
|
|
if (data.error) {
|
|
throw new Error(data.error);
|
|
}
|
|
|
|
// Utiliser camelCase pour les propriétés de la réponse principale
|
|
if (data.anonymizedText && data.analyzerResults) {
|
|
setOutputText(data.anonymizedText);
|
|
|
|
const entityTypeMap = new Map<string, string>();
|
|
patterns.forEach((p) => {
|
|
const match = p.regex.toString().match(/<([A-Z_]+)>/);
|
|
if (match && match[1]) {
|
|
entityTypeMap.set(match[1], p.label);
|
|
}
|
|
});
|
|
|
|
// 1. Compter les occurrences de chaque tag d'entité dans le texte anonymisé
|
|
const tagCounts = new Map<string, number>();
|
|
data.analyzerResults.forEach((result) => {
|
|
const tag = `<${result.entity_type}>`;
|
|
if (!tagCounts.has(result.entity_type)) {
|
|
const count = (
|
|
data.anonymizedText?.match(new RegExp(tag, "g")) || []
|
|
).length;
|
|
tagCounts.set(result.entity_type, count);
|
|
}
|
|
});
|
|
|
|
const seen = new Set<string>();
|
|
const uniqueMappings: EntityMapping[] = [];
|
|
const addedCounts = new Map<string, number>();
|
|
|
|
// 2. N'ajouter que les entités réellement anonymisées avec un compteur
|
|
data.analyzerResults
|
|
.sort((a, b) => a.start - b.start) // Trier par ordre d'apparition
|
|
.forEach((result) => {
|
|
const entityType = result.entity_type;
|
|
const maxCount = tagCounts.get(entityType) || 0;
|
|
const currentCount = addedCounts.get(entityType) || 0;
|
|
|
|
if (currentCount < maxCount) {
|
|
const originalValue = textToProcess.substring(
|
|
result.start,
|
|
result.end
|
|
);
|
|
const frenchLabel = entityTypeMap.get(entityType) || entityType;
|
|
const uniqueKey = `${frenchLabel}|${originalValue}`;
|
|
|
|
if (!seen.has(uniqueKey)) {
|
|
const newCount = (addedCounts.get(entityType) || 0) + 1;
|
|
addedCounts.set(entityType, newCount);
|
|
|
|
uniqueMappings.push({
|
|
entityType: frenchLabel,
|
|
originalValue: originalValue,
|
|
anonymizedValue: `${frenchLabel} [${newCount}]`,
|
|
startIndex: result.start,
|
|
endIndex: result.end,
|
|
});
|
|
seen.add(uniqueKey);
|
|
}
|
|
}
|
|
});
|
|
|
|
setEntityMappings(uniqueMappings);
|
|
} else if (data.text) {
|
|
setOutputText(data.text);
|
|
setError("Presidio temporairement indisponible. Texte non anonymisé.");
|
|
}
|
|
} catch (error) {
|
|
setError(
|
|
error instanceof Error
|
|
? error.message
|
|
: "Erreur lors de l'anonymisation avec Presidio"
|
|
);
|
|
} finally {
|
|
setIsProcessing(false);
|
|
}
|
|
};
|
|
|
|
return { anonymizeData, isProcessing };
|
|
};
|