presidio ok v.1
This commit is contained in:
@@ -17,34 +17,75 @@ export const AnonymizationInterface = ({
|
||||
|
||||
const anonymizedTypes = new Set<string>();
|
||||
|
||||
// Détecter les patterns d'anonymisation dans le texte de sortie
|
||||
// ✅ NOUVEAUX PATTERNS PRESIDIO
|
||||
|
||||
// Noms (PERSON)
|
||||
if (outputText.includes("<PERSON>")) {
|
||||
anonymizedTypes.add("Prénoms");
|
||||
anonymizedTypes.add("Noms de famille");
|
||||
anonymizedTypes.add("Noms complets");
|
||||
}
|
||||
|
||||
// Noms (Prénoms, Noms de famille, Noms complets)
|
||||
// Emails (EMAIL_ADDRESS)
|
||||
if (outputText.includes("<EMAIL_ADDRESS>")) {
|
||||
anonymizedTypes.add("Adresses e-mail");
|
||||
}
|
||||
|
||||
// Téléphones (PHONE_NUMBER)
|
||||
if (outputText.includes("<PHONE_NUMBER>")) {
|
||||
anonymizedTypes.add("Numéros de téléphone");
|
||||
}
|
||||
|
||||
// Adresses (LOCATION)
|
||||
if (outputText.includes("<LOCATION>")) {
|
||||
anonymizedTypes.add("Adresses");
|
||||
}
|
||||
|
||||
// IBAN (IBAN)
|
||||
if (outputText.includes("<IBAN>")) {
|
||||
anonymizedTypes.add("Numéros d'ID"); // Ou créer une nouvelle catégorie "IBAN"
|
||||
}
|
||||
|
||||
// Organisations (ORGANIZATION)
|
||||
if (outputText.includes("<ORGANIZATION>")) {
|
||||
anonymizedTypes.add("Noms de domaine"); // Ou adapter selon vos besoins
|
||||
}
|
||||
|
||||
// Dates personnalisées (CUSTOM_DATE)
|
||||
if (outputText.includes("<CUSTOM_DATE>")) {
|
||||
anonymizedTypes.add("Dates");
|
||||
}
|
||||
|
||||
// Numéros d'entreprise belges (BE_ENTERPRISE_NUMBER)
|
||||
if (outputText.includes("<BE_ENTERPRISE_NUMBER>")) {
|
||||
anonymizedTypes.add("Numéros d'ID");
|
||||
}
|
||||
|
||||
// ✅ ANCIENS PATTERNS (pour compatibilité)
|
||||
|
||||
// Noms (anciens patterns [Nom1], [Nom2]...)
|
||||
if (outputText.includes("[Nom1]") || outputText.includes("[Nom")) {
|
||||
anonymizedTypes.add("Prénoms");
|
||||
anonymizedTypes.add("Noms de famille");
|
||||
anonymizedTypes.add("Noms complets");
|
||||
}
|
||||
|
||||
// Emails
|
||||
// Emails (anciens patterns)
|
||||
if (outputText.includes("[Email1]") || outputText.includes("[Email")) {
|
||||
anonymizedTypes.add("Adresses e-mail");
|
||||
}
|
||||
|
||||
// Téléphones
|
||||
if (
|
||||
outputText.includes("[Téléphone1]") ||
|
||||
outputText.includes("[Téléphone")
|
||||
) {
|
||||
// Téléphones (anciens patterns)
|
||||
if (outputText.includes("[Téléphone1]") || outputText.includes("[Téléphone")) {
|
||||
anonymizedTypes.add("Numéros de téléphone");
|
||||
}
|
||||
|
||||
// Adresses
|
||||
// Adresses (anciens patterns)
|
||||
if (outputText.includes("[Adresse1]") || outputText.includes("[Adresse")) {
|
||||
anonymizedTypes.add("Adresses");
|
||||
}
|
||||
|
||||
// Numéros d'ID / Sécurité sociale
|
||||
// Numéros d'ID / Sécurité sociale (anciens patterns)
|
||||
if (
|
||||
outputText.includes("[NuméroSS1]") ||
|
||||
outputText.includes("[NuméroSS") ||
|
||||
@@ -53,14 +94,6 @@ export const AnonymizationInterface = ({
|
||||
anonymizedTypes.add("Numéros d'ID");
|
||||
}
|
||||
|
||||
// Dates
|
||||
if (
|
||||
outputText.includes("[Date") ||
|
||||
/\[\d{2}\/\d{2}\/\d{4}\]/.test(outputText)
|
||||
) {
|
||||
anonymizedTypes.add("Dates");
|
||||
}
|
||||
|
||||
// Valeurs monétaires
|
||||
if (outputText.includes("[Montant") || /\[\d+[€$]\]/.test(outputText)) {
|
||||
anonymizedTypes.add("Valeurs monétaires");
|
||||
|
||||
@@ -8,6 +8,29 @@ interface EntityMapping {
|
||||
endIndex: number;
|
||||
}
|
||||
|
||||
// Nouvelle interface pour les résultats de Presidio Analyzer
|
||||
interface PresidioAnalyzerResult {
|
||||
entity_type: string;
|
||||
start: number;
|
||||
end: number;
|
||||
score: number;
|
||||
analysis_explanation?: {
|
||||
recognizer: string;
|
||||
pattern_name?: string;
|
||||
pattern?: string;
|
||||
validation_result?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
// Interface pour la réponse de l'API
|
||||
interface ProcessDocumentResponse {
|
||||
text?: string;
|
||||
anonymizedText?: string;
|
||||
piiCount?: number;
|
||||
analyzerResults?: PresidioAnalyzerResult[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
interface AnonymizationLogicProps {
|
||||
sourceText: string;
|
||||
fileContent: string;
|
||||
@@ -31,9 +54,7 @@ export const useAnonymization = ({
|
||||
const textToProcess = sourceText || fileContent || "";
|
||||
|
||||
if (!textToProcess.trim()) {
|
||||
setError(
|
||||
"Veuillez saisir du texte à anonymiser ou télécharger un fichier"
|
||||
);
|
||||
setError("Veuillez saisir du texte à anonymiser ou télécharger un fichier");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -43,131 +64,65 @@ export const useAnonymization = ({
|
||||
setEntityMappings([]);
|
||||
|
||||
try {
|
||||
if (
|
||||
uploadedFile &&
|
||||
uploadedFile.type === "application/pdf" &&
|
||||
!fileContent
|
||||
) {
|
||||
const formData = new FormData();
|
||||
formData.append("file", uploadedFile);
|
||||
|
||||
const response = await fetch("/api/process-document", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Erreur lors du traitement du PDF");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.error) {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
|
||||
if (data.anonymizedText) {
|
||||
setOutputText(data.anonymizedText);
|
||||
// TODO: Extraire les mappings depuis les résultats Presidio
|
||||
setIsProcessing(false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 1500));
|
||||
|
||||
// Simulation des mappings pour le fallback
|
||||
const mappings: EntityMapping[] = [];
|
||||
let anonymized = textToProcess;
|
||||
console.log("🚀 Début anonymisation avec Presidio");
|
||||
|
||||
// Noms
|
||||
const nameMatches = textToProcess.matchAll(/\b[A-Z][a-z]+ [A-Z][a-z]+\b/g);
|
||||
let nameCounter = 1;
|
||||
for (const match of nameMatches) {
|
||||
const replacement = `[Nom${nameCounter}]`;
|
||||
mappings.push({
|
||||
originalValue: match[0],
|
||||
anonymizedValue: replacement,
|
||||
entityType: "PERSON",
|
||||
startIndex: match.index!,
|
||||
endIndex: match.index! + match[0].length
|
||||
});
|
||||
anonymized = anonymized.replace(match[0], replacement);
|
||||
nameCounter++;
|
||||
const formData = new FormData();
|
||||
|
||||
if (uploadedFile) {
|
||||
console.log("📁 Traitement fichier:", uploadedFile.name);
|
||||
formData.append("file", uploadedFile);
|
||||
} else {
|
||||
console.log("📝 Traitement texte saisi");
|
||||
const textBlob = new Blob([textToProcess], { type: "text/plain" });
|
||||
const textFile = new File([textBlob], "input.txt", { type: "text/plain" });
|
||||
formData.append("file", textFile);
|
||||
}
|
||||
|
||||
// Téléphones
|
||||
const phoneMatches = textToProcess.matchAll(/\b0[1-9](?:[\s.-]?\d{2}){4}\b/g);
|
||||
let phoneCounter = 1;
|
||||
for (const match of phoneMatches) {
|
||||
const replacement = `[Téléphone${phoneCounter}]`;
|
||||
mappings.push({
|
||||
originalValue: match[0],
|
||||
anonymizedValue: replacement,
|
||||
entityType: "PHONE_NUMBER",
|
||||
startIndex: match.index!,
|
||||
endIndex: match.index! + match[0].length
|
||||
});
|
||||
anonymized = anonymized.replace(match[0], replacement);
|
||||
phoneCounter++;
|
||||
console.log("🔍 Appel à /api/process-document avec Presidio...");
|
||||
const response = await fetch("/api/process-document", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Erreur API: ${response.status}`);
|
||||
}
|
||||
|
||||
// Emails
|
||||
const emailMatches = textToProcess.matchAll(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g);
|
||||
let emailCounter = 1;
|
||||
for (const match of emailMatches) {
|
||||
const replacement = `[Email${emailCounter}]`;
|
||||
mappings.push({
|
||||
originalValue: match[0],
|
||||
anonymizedValue: replacement,
|
||||
entityType: "EMAIL_ADDRESS",
|
||||
startIndex: match.index!,
|
||||
endIndex: match.index! + match[0].length
|
||||
});
|
||||
anonymized = anonymized.replace(match[0], replacement);
|
||||
emailCounter++;
|
||||
const data: ProcessDocumentResponse = await response.json();
|
||||
console.log("📊 Réponse API:", data);
|
||||
|
||||
if (data.error) {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
|
||||
// Adresses
|
||||
const addressMatches = textToProcess.matchAll(/\b\d{1,3}\s+[a-zA-Z\s]+,\s*\d{5}\s+[a-zA-Z\s]+\b/g);
|
||||
let addressCounter = 1;
|
||||
for (const match of addressMatches) {
|
||||
const replacement = `[Adresse${addressCounter}]`;
|
||||
mappings.push({
|
||||
originalValue: match[0],
|
||||
anonymizedValue: replacement,
|
||||
entityType: "LOCATION",
|
||||
startIndex: match.index!,
|
||||
endIndex: match.index! + match[0].length
|
||||
});
|
||||
anonymized = anonymized.replace(match[0], replacement);
|
||||
addressCounter++;
|
||||
if (data.anonymizedText) {
|
||||
console.log("✅ Anonymisation réussie avec Presidio");
|
||||
setOutputText(data.anonymizedText);
|
||||
|
||||
// Extraire les mappings depuis les résultats Presidio (plus d'erreur 'any')
|
||||
if (data.analyzerResults && data.text) {
|
||||
const mappings: EntityMapping[] = data.analyzerResults.map((entity: PresidioAnalyzerResult, index: number) => ({
|
||||
originalValue: data.text!.substring(entity.start, entity.end),
|
||||
anonymizedValue: `[${entity.entity_type}${index + 1}]`,
|
||||
entityType: entity.entity_type,
|
||||
startIndex: entity.start,
|
||||
endIndex: entity.end
|
||||
}));
|
||||
setEntityMappings(mappings);
|
||||
console.log("📋 Entités détectées:", mappings.length);
|
||||
console.log("🔍 Détails des entités:", mappings);
|
||||
}
|
||||
} else if (data.text) {
|
||||
console.log("⚠️ Fallback: Presidio non disponible, texte original retourné");
|
||||
setOutputText(data.text);
|
||||
setError("Presidio temporairement indisponible. Texte non anonymisé.");
|
||||
}
|
||||
|
||||
// Numéros de sécurité sociale
|
||||
const ssnMatches = textToProcess.matchAll(/\b\d\s\d{2}\s\d{2}\s\d{2}\s\d{3}\s\d{3}\s\d{2}\b/g);
|
||||
let ssnCounter = 1;
|
||||
for (const match of ssnMatches) {
|
||||
const replacement = `[NuméroSS${ssnCounter}]`;
|
||||
mappings.push({
|
||||
originalValue: match[0],
|
||||
anonymizedValue: replacement,
|
||||
entityType: "FR_NIR",
|
||||
startIndex: match.index!,
|
||||
endIndex: match.index! + match[0].length
|
||||
});
|
||||
anonymized = anonymized.replace(match[0], replacement);
|
||||
ssnCounter++;
|
||||
}
|
||||
|
||||
setOutputText(anonymized);
|
||||
setEntityMappings(mappings);
|
||||
} catch (error) {
|
||||
console.error("Erreur anonymisation:", error);
|
||||
console.error("❌ Erreur anonymisation:", error);
|
||||
setError(
|
||||
error instanceof Error
|
||||
? error.message
|
||||
: "Erreur lors de l'anonymisation"
|
||||
? `Erreur Presidio: ${error.message}`
|
||||
: "Erreur lors de l'anonymisation avec Presidio"
|
||||
);
|
||||
} finally {
|
||||
setIsProcessing(false);
|
||||
|
||||
Reference in New Issue
Block a user