Anonyme/app/api/process-document/route.ts

import { NextResponse, type NextRequest } from "next/server";
import pdf from "pdf-parse"; // ✅ Import correct
import mammoth from "mammoth";

export async function POST(req: NextRequest) {
  console.log("🔍 Début du traitement de la requête");

  try {
    const formData = await req.formData();
    const file = formData.get("file") as File | null;
    // ✅ Validation améliorée du fichier
    if (!file) {
      return NextResponse.json(
        { error: "Aucun fichier reçu." },
        { status: 400 }
      );
    }

    // Vérifications supplémentaires
    if (file.size === 0) {
      return NextResponse.json(
        { error: "Le fichier est vide (0 bytes)." },
        { status: 400 }
      );
    }

    if (file.size > 50 * 1024 * 1024) {
      // 50MB
      return NextResponse.json(
        { error: "Le fichier est trop volumineux (max 50MB)." },
        { status: 400 }
      );
    }

    console.log("📁 Fichier reçu:", {
      name: file.name,
      type: file.type,
      size: `${(file.size / 1024 / 1024).toFixed(2)} MB`,
      lastModified: new Date(file.lastModified).toISOString(),
    });

    let fileContent = "";
    const fileType = file.type;

    // --- LOGIQUE D'EXTRACTION DE TEXTE ---
    if (fileType === "application/pdf") {
      console.log("📄 Traitement PDF en cours...");
      console.log("📊 Taille du fichier:", file.size, "bytes");

      try {
        const buffer = Buffer.from(await file.arrayBuffer());
        console.log("📦 Buffer créé, taille:", buffer.length);

        const data = await pdf(buffer);
        fileContent = data.text || "";

        console.log("✅ Extraction PDF réussie, longueur:", fileContent.length);
        console.log("📄 Nombre de pages:", data.numpages);
        console.log("ℹ️ Info PDF:", data.info?.Title || "Titre non disponible");

        // ✅ Vérification améliorée
        if (!fileContent.trim()) {
          console.log("⚠️ PDF vide - Détails:", {
            pages: data.numpages,
            metadata: data.metadata,
            info: data.info,
            extractedLength: fileContent.length,
          });

          // Détecter si c'est un PDF scanné
          const isScanned =
            data.info?.Creator?.includes("RICOH") ||
            data.info?.Creator?.includes("Canon") ||
            data.info?.Creator?.includes("HP") ||
            data.info?.Producer?.includes("Scanner") ||
            (data.numpages > 0 && fileContent.length < 50);

          const errorMessage = isScanned
            ? `Ce PDF semble être un document scanné (créé par: ${data.info?.Creator}). Les documents scannés contiennent des images de texte, pas du texte extractible.\n\n💡 Solutions :\n- Utilisez un PDF créé depuis Word/Google Docs\n- Appliquez l'OCR avec Adobe Acrobat\n- Recréez le document au lieu de le scanner`
            : `Le PDF ne contient pas de texte extractible.\n\nCela peut être dû à :\n- PDF scanné (image uniquement)\n- PDF protégé\n- PDF avec texte en images\n- Nombre de pages: ${data.numpages}`;

          return NextResponse.json({ error: errorMessage }, { status: 400 });
        }
      } catch (pdfError) {
        console.error("❌ Erreur PDF détaillée:", {
          message:
            pdfError instanceof Error ? pdfError.message : "Erreur inconnue",
          stack: pdfError instanceof Error ? pdfError.stack : undefined,
          fileName: file.name,
          fileSize: file.size,
          fileType: file.type,
        });

        return NextResponse.json(
          {
            error: `Impossible de traiter ce PDF (${file.name}). Erreur: ${
              pdfError instanceof Error ? pdfError.message : "Erreur inconnue"
            }. Vérifiez que le PDF n'est pas protégé, corrompu ou scanné.`,
          },
          { status: 500 }
        );
      }
    } else if (
      fileType ===
      "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
    ) {
      console.log("📝 Traitement Word en cours...");
      try {
        const arrayBuffer = await file.arrayBuffer();
        const result = await mammoth.extractRawText({ arrayBuffer });
        fileContent = result.value || "";
        console.log(
          "✅ Extraction Word réussie, longueur:",
          fileContent.length
        );
      } catch (wordError) {
        console.error("❌ Erreur Word:", wordError);
        return NextResponse.json(
          {
            error: `Erreur traitement Word: ${
              wordError instanceof Error ? wordError.message : "Erreur inconnue"
            }`,
          },
          { status: 500 }
        );
      }
    } else {
      console.log("📄 Traitement texte en cours...");
      try {
        fileContent = await file.text();
        console.log(
          "✅ Extraction texte réussie, longueur:",
          fileContent.length
        );
      } catch (textError) {
        console.error("❌ Erreur texte:", textError);
        return NextResponse.json(
          {
            error: `Erreur lecture texte: ${
              textError instanceof Error ? textError.message : "Erreur inconnue"
            }`,
          },
          { status: 500 }
        );
      }
    }

    if (!fileContent || fileContent.trim().length === 0) {
      console.log("⚠️ Contenu vide détecté");
      return NextResponse.json(
        { error: "Le fichier ne contient pas de texte extractible." },
        { status: 400 }
      );
    }

    // Vérifier si c'est juste pour l'extraction de texte (lecture simple)
    const isSimpleExtraction =
      req.headers.get("x-simple-extraction") === "true";

    if (isSimpleExtraction) {
      // Retourner juste le texte extrait
      return NextResponse.json({ text: fileContent }, { status: 200 });
    }

    // ==========================================================
    // CONFIGURATION PRESIDIO ANALYZER (pour l'anonymisation complète)
    // ==========================================================

    const analyzerConfig = {
      text: fileContent,
      language: "fr",
    };

    console.log("🔍 Appel à Presidio Analyzer...");

    // ✅ Définir l'URL AVANT de l'utiliser
    const presidioAnalyzerUrl = "http://localhost:5001/analyze";

    try {
      const analyzeResponse = await fetch(presidioAnalyzerUrl, {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
          Accept: "application/json",
        },
        body: JSON.stringify(analyzerConfig),
      });

      console.log("📊 Statut Analyzer:", analyzeResponse.status);
      console.log("📊 Headers Analyzer:", analyzeResponse.headers);

      if (!analyzeResponse.ok) {
        const errorBody = await analyzeResponse.text();
        console.error("❌ Erreur Analyzer:", errorBody);
        console.error("❌ URL utilisée:", presidioAnalyzerUrl);
        console.error("❌ Config envoyée:", analyzerConfig);
        // Fallback: retourner juste le texte si Presidio n'est pas disponible
        return NextResponse.json({ text: fileContent }, { status: 200 });
      }

      const analyzerResults = await analyzeResponse.json();
      console.log("✅ Analyzer a trouvé", analyzerResults.length, "entités.");

      // =========================================================================
      // CONFIGURATION PRESIDIO ANONYMIZER
      // =========================================================================

      const anonymizerConfig = {
        text: fileContent,
        analyzer_results: analyzerResults,
      };

      console.log("🔍 Appel à Presidio Anonymizer...");
      const presidioAnonymizerUrl = "http://localhost:5001/anonymize";

      const anonymizeResponse = await fetch(presidioAnonymizerUrl, {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
          Accept: "application/json",
        },
        body: JSON.stringify(anonymizerConfig),
      });

      console.log("📊 Statut Anonymizer:", anonymizeResponse.status);
      if (!anonymizeResponse.ok) {
        const errorBody = await anonymizeResponse.text();
        console.error("❌ Erreur Anonymizer:", errorBody);
        // Fallback: retourner juste le texte si Presidio n'est pas disponible
        return NextResponse.json({ text: fileContent }, { status: 200 });
      }

      const anonymizerResult = await anonymizeResponse.json();
      console.log("✅ Anonymisation réussie.");

      // 🔧 NOUVELLE FONCTION SIMPLIFIÉE pour extraire les valeurs de remplacement
      // Ajouter cette interface au début du fichier
      interface AnalyzerResult {
        entity_type: string;
        start: number;
        end: number;
        score: number;
      }

      // Puis modifier la fonction
      const extractReplacementValues = (
        originalText: string,
        anonymizedText: string,
        analyzerResults: AnalyzerResult[]
      ) => {
        const replacementMap: Record<string, string> = {};

        // Approche simple : comparer caractère par caractère
        let originalIndex = 0;
        let anonymizedIndex = 0;

        // Trier les résultats par position
        const sortedResults = [...analyzerResults].sort((a, b) => a.start - b.start);

        for (const result of sortedResults) {
          const originalValue = originalText.substring(result.start, result.end);

          // Avancer jusqu'à la position de l'entité dans le texte original
          while (originalIndex < result.start) {
            originalIndex++;
            anonymizedIndex++;
          }

          // Maintenant on est au début de l'entité
          // Dans le texte anonymisé, on doit avoir un remplacement qui commence par '['
          if (anonymizedText[anonymizedIndex] === '[') {
            // Trouver la fin du remplacement (le ']')
            let endBracket = anonymizedIndex;
            while (endBracket < anonymizedText.length && anonymizedText[endBracket] !== ']') {
              endBracket++;
            }
            endBracket++; // Inclure le ']'

            const replacementValue = anonymizedText.substring(anonymizedIndex, endBracket);
            replacementMap[originalValue] = replacementValue;

            // Avancer les index
            originalIndex = result.end;
            anonymizedIndex = endBracket;
          } else {
            // Si pas de '[', avancer normalement
            originalIndex = result.end;
            anonymizedIndex += (result.end - result.start);
          }
        }

        console.log("🔧 Valeurs de remplacement extraites:", replacementMap);
        return replacementMap;
      };

      const replacementValues = extractReplacementValues(
        fileContent,
        anonymizerResult.anonymized_text,
        analyzerResults
      );

      // 🔍 AJOUT D'UN LOG POUR DÉBOGUER
      console.log("🔧 Valeurs de remplacement extraites:", replacementValues);

      const result = {
        text: fileContent,
        anonymizedText: anonymizerResult.anonymized_text,
        piiCount: analyzerResults.length,
        analyzerResults: analyzerResults,
        replacementValues: replacementValues, // Utiliser les nouvelles valeurs
      };

      return NextResponse.json(result, { status: 200 });
    } catch (presidioError) {
      console.error("❌ Erreur Presidio:", presidioError);
      // Fallback: retourner juste le texte extrait
      return NextResponse.json({ text: fileContent }, { status: 200 });
    }
  } catch (err: unknown) {
    console.error("❌ Erreur générale:", err);
    return NextResponse.json(
      {
        error: err instanceof Error ? err.message : "Erreur serveur inconnue.",
      },
      { status: 500 }
    );
  }
}