Spaces:

Seth0330
/

AIEXTRACT1

Running

App Files Files Community

Seth0330 commited on 9 days ago

Commit

fafa006

verified ·

1 Parent(s): b90a9db

Update frontend/src/components/ExportButtons.jsx

Browse files

Files changed (1) hide show

frontend/src/components/ExportButtons.jsx +153 -4

frontend/src/components/ExportButtons.jsx CHANGED Viewed

@@ -21,16 +21,165 @@ import {
 } from "@/components/ui/dropdown-menu";
 import { cn } from "@/lib/utils";
-export default function ExportButtons({ isComplete }) {
   const [downloading, setDownloading] = useState(null);
   const [copied, setCopied] = useState(false);
   const handleDownload = (format) => {
     setDownloading(format);
-    // Simulate download
-    setTimeout(() => {
       setDownloading(null);
-    }, 1500);
   };
   const handleCopyLink = () => {

 } from "@/components/ui/dropdown-menu";
 import { cn } from "@/lib/utils";
+// Helper functions from ExtractionOutput
+function prepareFieldsForOutput(fields, format = "json") {
+  if (!fields || typeof fields !== "object") {
+    return fields;
+  }
+  const output = { ...fields };
+  // Remove full_text from top-level if pages array exists (to avoid duplication)
+  if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
+    delete output.full_text;
+    // Clean up each page: remove full_text from page.fields (it duplicates page.text)
+    output.pages = output.pages.map(page => {
+      const cleanedPage = { ...page };
+      if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
+        const cleanedFields = { ...cleanedPage.fields };
+        // Remove full_text from page fields (duplicates page.text)
+        delete cleanedFields.full_text;
+        cleanedPage.fields = cleanedFields;
+      }
+      return cleanedPage;
+    });
+  }
+  // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
+  if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
+    // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
+    const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text"));
+    output.pages.forEach((page, idx) => {
+      const pageNum = page.page_number || idx + 1;
+      const pageFields = page.fields || {};
+      // Remove duplicate fields from page.fields:
+      // 1. Remove full_text (duplicates page.text)
+      // 2. Remove fields that match top-level fields (already shown at root)
+      const cleanedPageFields = {};
+      for (const [key, value] of Object.entries(pageFields)) {
+        // Skip full_text and fields that match top-level exactly
+        if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
+          cleanedPageFields[key] = value;
+        }
+      }
+      const pageObj = {
+        text: page.text || "",
+        confidence: page.confidence || 0,
+        doc_type: page.doc_type || "other"
+      };
+      // Only add fields if there are unique page-specific fields
+      if (Object.keys(cleanedPageFields).length > 0) {
+        pageObj.fields = cleanedPageFields;
+      }
+      output[`page_${pageNum}`] = pageObj;
+    });
+    // Remove pages array - we now have page_1, page_2, etc. as separate fields
+    delete output.pages;
+  }
+  return output;
+}
+function escapeXML(str) {
+  return str
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/"/g, "&quot;")
+    .replace(/'/g, "&apos;");
+}
+function objectToXML(obj, rootName = "extraction") {
+  // Prepare fields - remove full_text if pages exist
+  const preparedObj = prepareFieldsForOutput(obj, "xml");
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
+  const convert = (obj, indent = "  ") => {
+    for (const [key, value] of Object.entries(obj)) {
+      if (value === null || value === undefined) continue;
+      // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
+      if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
+        continue;
+      }
+      if (Array.isArray(value)) {
+        value.forEach((item) => {
+          xml += `${indent}<${key}>\n`;
+          if (typeof item === "object") {
+            convert(item, indent + "  ");
+          } else {
+            xml += `${indent}  ${escapeXML(String(item))}\n`;
+          }
+          xml += `${indent}</${key}>\n`;
+        });
+      } else if (typeof value === "object") {
+        xml += `${indent}<${key}>\n`;
+        convert(value, indent + "  ");
+        xml += `${indent}</${key}>\n`;
+      } else {
+        xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
+      }
+    }
+  };
+  convert(preparedObj);
+  xml += `</${rootName}>`;
+  return xml;
+}
+export default function ExportButtons({ isComplete, extractionResult }) {
   const [downloading, setDownloading] = useState(null);
   const [copied, setCopied] = useState(false);
   const handleDownload = (format) => {
+    if (!extractionResult || !extractionResult.fields) {
+      console.error("No extraction data available");
+      return;
+    }
     setDownloading(format);
+    try {
+      const fields = extractionResult.fields;
+      let content = "";
+      let filename = "";
+      let mimeType = "";
+      if (format === "json") {
+        const preparedFields = prepareFieldsForOutput(fields, "json");
+        content = JSON.stringify(preparedFields, null, 2);
+        filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
+        mimeType = "application/json";
+      } else if (format === "xml") {
+        content = objectToXML(fields);
+        filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
+        mimeType = "application/xml";
+      }
+      // Create blob and download
+      const blob = new Blob([content], { type: mimeType });
+      const url = URL.createObjectURL(blob);
+      const link = document.createElement("a");
+      link.href = url;
+      link.download = filename;
+      document.body.appendChild(link);
+      link.click();
+      document.body.removeChild(link);
+      URL.revokeObjectURL(url);
+      setDownloading(null);
+    } catch (error) {
+      console.error("Download error:", error);
       setDownloading(null);
+    }
   };
   const handleCopyLink = () => {