Files
Anonyme/app/api/process-document/route.ts
2025-06-18 14:57:11 +02:00

287 lines
8.4 KiB
TypeScript

import { NextResponse, type NextRequest } from "next/server";
import pdf from "pdf-parse/lib/pdf-parse";
import mammoth from "mammoth";
export async function POST(req: NextRequest) {
console.log("🔍 Début du traitement de la requête");
try {
const formData = await req.formData();
const file = formData.get("file") as File | null;
console.log("📁 Fichier reçu:", file?.name, file?.type);
if (!file) {
console.log("❌ Aucun fichier reçu");
return NextResponse.json(
{ error: "Aucun fichier reçu." },
{ status: 400 }
);
}
let fileContent = "";
const fileType = file.type;
console.log("🔍 Type de fichier:", fileType);
if (fileType === "application/pdf") {
console.log("📄 Traitement PDF en cours...");
try {
const buffer = Buffer.from(await file.arrayBuffer());
console.log("📊 Taille du buffer:", buffer.length);
const data = await pdf(buffer);
fileContent = data.text;
console.log(
"✅ Extraction PDF réussie, longueur du texte:",
fileContent.length
);
} catch (pdfError) {
console.error("❌ Erreur PDF:", pdfError);
return NextResponse.json(
{
error: `Erreur lors du traitement du PDF: ${
pdfError instanceof Error ? pdfError.message : "Erreur inconnue"
}`,
},
{ status: 500 }
);
}
} else if (
fileType ===
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
) {
console.log("📝 Traitement Word en cours...");
try {
const arrayBuffer = await file.arrayBuffer();
const result = await mammoth.extractRawText({ arrayBuffer });
fileContent = result.value;
console.log(
"✅ Extraction Word réussie, longueur du texte:",
fileContent.length
);
} catch (wordError) {
console.error("❌ Erreur Word:", wordError);
return NextResponse.json(
{
error: `Erreur lors du traitement du document Word: ${
wordError instanceof Error ? wordError.message : "Erreur inconnue"
}`,
},
{ status: 500 }
);
}
} else {
console.log("📄 Traitement texte en cours...");
try {
fileContent = await file.text();
console.log(
"✅ Extraction texte réussie, longueur:",
fileContent.length
);
} catch (textError) {
console.error("❌ Erreur texte:", textError);
return NextResponse.json(
{
error: `Erreur lors de la lecture du fichier texte: ${
textError instanceof Error ? textError.message : "Erreur inconnue"
}`,
},
{ status: 500 }
);
}
}
// Vérification du contenu extrait
if (!fileContent || fileContent.trim().length === 0) {
console.log("⚠️ Contenu vide détecté");
return NextResponse.json(
{ error: "Le fichier ne contient pas de texte extractible." },
{ status: 400 }
);
}
console.log("🔍 Contenu extrait, longueur:", fileContent.length);
const analyzerConfig = {
text: fileContent,
language: "fr",
ad_hoc_recognizers: [
{
name: "BelgianNRNRecognizer",
supported_entity: "BE_NATIONAL_REGISTER_NUMBER",
supported_language: "fr",
patterns: [
{
name: "NRN_Pattern",
regex:
"\\b(?:[0-9]{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))-?\\d{3}\\.?\\d{2}\\b",
score: 1.0,
},
],
context: ["registre national", "nrn", "niss"],
},
{
name: "BelgianEnterpriseRecognizer",
supported_entity: "BE_ENTERPRISE_NUMBER",
supported_language: "fr",
patterns: [
{
name: "BTW_Pattern",
regex: "\\bBE\\s?0\\d{3}\\.\\d{3}\\.\\d{3}\\b",
score: 0.95,
},
],
context: ["entreprise", "btw", "tva"],
},
{
name: "IBANRecognizer",
supported_entity: "IBAN",
supported_language: "fr",
patterns: [
{
name: "IBAN_Pattern",
regex: "\\b[A-Z]{2}\\d{2}\\s?(?:\\d{4}\\s?){4,7}\\d{1,4}\\b",
score: 0.95,
},
],
context: ["iban", "compte", "bancaire"],
},
{
name: "PhoneRecognizer",
supported_entity: "PHONE_NUMBER",
supported_language: "fr",
patterns: [
{
name: "Phone_Pattern",
regex:
"\\b(?:(?:\\+|00)(?:32|33|352)|0)\\s?[1-9](?:[\\s.-]?\\d{2}){3,4}\\b",
score: 0.8,
},
],
context: ["téléphone", "tel", "mobile", "gsm"],
},
{
name: "EmailRecognizer",
supported_entity: "EMAIL_ADDRESS",
supported_language: "fr",
patterns: [
{
name: "Email_Pattern",
regex: "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b",
score: 1.0,
},
],
context: ["email", "courriel", "adresse électronique"],
},
],
};
console.log("🔍 Appel à Presidio Analyzer...");
const presidioAnalyzerUrl =
"http://ocs00s000ssow8kssossocco.51.68.233.212.sslip.io/analyze";
const analyzeResponse = await fetch(presidioAnalyzerUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
Accept: "application/json",
},
body: JSON.stringify(analyzerConfig),
});
console.log("📊 Statut Analyzer:", analyzeResponse.status);
if (!analyzeResponse.ok) {
const errorBody = await analyzeResponse.text();
console.error("❌ Erreur Analyzer:", errorBody);
return NextResponse.json(
{
error: `Erreur de l'analyseur Presidio (${analyzeResponse.status}): ${errorBody}`,
},
{ status: 500 }
);
}
let analyzerResults;
try {
analyzerResults = await analyzeResponse.json();
console.log("✅ Analyzer réussi, résultats:", analyzerResults.length);
} catch (jsonError) {
console.error("❌ Erreur parsing JSON Analyzer:", jsonError);
return NextResponse.json(
{ error: "Erreur lors du parsing de la réponse de l'analyseur" },
{ status: 500 }
);
}
console.log("🔍 Appel à Presidio Anonymizer...");
const presidioAnonymizerUrl =
"http://r8gko4kcwwk4sso40cc0gkg8.51.68.233.212.sslip.io/anonymize";
const anonymizeResponse = await fetch(presidioAnonymizerUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
Accept: "application/json",
},
body: JSON.stringify({
text: fileContent,
analyzer_results: analyzerResults,
}),
});
console.log("📊 Statut Anonymizer:", anonymizeResponse.status);
if (!anonymizeResponse.ok) {
const errorBody = await anonymizeResponse.text();
console.error("❌ Erreur Anonymizer:", errorBody);
return NextResponse.json(
{
error: `Erreur de l'anonymiseur Presidio (${anonymizeResponse.status}): ${errorBody}`,
},
{ status: 500 }
);
}
let anonymizerResult;
try {
anonymizerResult = await anonymizeResponse.json();
console.log("✅ Anonymizer réussi");
} catch (jsonError) {
console.error("❌ Erreur parsing JSON Anonymizer:", jsonError);
return NextResponse.json(
{ error: "Erreur lors du parsing de la réponse de l'anonymiseur" },
{ status: 500 }
);
}
const result = {
anonymizedText: anonymizerResult.text,
piiCount: analyzerResults.length,
};
console.log("✅ Traitement terminé avec succès");
return NextResponse.json(result, {
status: 200,
headers: {
"Content-Type": "application/json",
},
});
} catch (err: unknown) {
console.error("❌ Erreur générale:", err);
const errorMessage =
err instanceof Error
? err.message
: "Une erreur inconnue est survenue sur le serveur.";
return NextResponse.json(
{ error: errorMessage },
{
status: 500,
headers: {
"Content-Type": "application/json",
},
}
);
}
}