Seth0330 commited on
Commit
fafa006
·
verified ·
1 Parent(s): b90a9db

Update frontend/src/components/ExportButtons.jsx

Browse files
frontend/src/components/ExportButtons.jsx CHANGED
@@ -21,16 +21,165 @@ import {
21
  } from "@/components/ui/dropdown-menu";
22
  import { cn } from "@/lib/utils";
23
 
24
- export default function ExportButtons({ isComplete }) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  const [downloading, setDownloading] = useState(null);
26
  const [copied, setCopied] = useState(false);
27
 
28
  const handleDownload = (format) => {
 
 
 
 
 
29
  setDownloading(format);
30
- // Simulate download
31
- setTimeout(() => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  setDownloading(null);
33
- }, 1500);
34
  };
35
 
36
  const handleCopyLink = () => {
 
21
  } from "@/components/ui/dropdown-menu";
22
  import { cn } from "@/lib/utils";
23
 
24
+ // Helper functions from ExtractionOutput
25
+ function prepareFieldsForOutput(fields, format = "json") {
26
+ if (!fields || typeof fields !== "object") {
27
+ return fields;
28
+ }
29
+
30
+ const output = { ...fields };
31
+
32
+ // Remove full_text from top-level if pages array exists (to avoid duplication)
33
+ if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
34
+ delete output.full_text;
35
+
36
+ // Clean up each page: remove full_text from page.fields (it duplicates page.text)
37
+ output.pages = output.pages.map(page => {
38
+ const cleanedPage = { ...page };
39
+ if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
40
+ const cleanedFields = { ...cleanedPage.fields };
41
+ // Remove full_text from page fields (duplicates page.text)
42
+ delete cleanedFields.full_text;
43
+ cleanedPage.fields = cleanedFields;
44
+ }
45
+ return cleanedPage;
46
+ });
47
+ }
48
+
49
+ // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
50
+ if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
51
+ // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
52
+ const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text"));
53
+
54
+ output.pages.forEach((page, idx) => {
55
+ const pageNum = page.page_number || idx + 1;
56
+ const pageFields = page.fields || {};
57
+
58
+ // Remove duplicate fields from page.fields:
59
+ // 1. Remove full_text (duplicates page.text)
60
+ // 2. Remove fields that match top-level fields (already shown at root)
61
+ const cleanedPageFields = {};
62
+ for (const [key, value] of Object.entries(pageFields)) {
63
+ // Skip full_text and fields that match top-level exactly
64
+ if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
65
+ cleanedPageFields[key] = value;
66
+ }
67
+ }
68
+
69
+ const pageObj = {
70
+ text: page.text || "",
71
+ confidence: page.confidence || 0,
72
+ doc_type: page.doc_type || "other"
73
+ };
74
+
75
+ // Only add fields if there are unique page-specific fields
76
+ if (Object.keys(cleanedPageFields).length > 0) {
77
+ pageObj.fields = cleanedPageFields;
78
+ }
79
+
80
+ output[`page_${pageNum}`] = pageObj;
81
+ });
82
+ // Remove pages array - we now have page_1, page_2, etc. as separate fields
83
+ delete output.pages;
84
+ }
85
+
86
+ return output;
87
+ }
88
+
89
+ function escapeXML(str) {
90
+ return str
91
+ .replace(/&/g, "&")
92
+ .replace(/</g, "&lt;")
93
+ .replace(/>/g, "&gt;")
94
+ .replace(/"/g, "&quot;")
95
+ .replace(/'/g, "&apos;");
96
+ }
97
+
98
+ function objectToXML(obj, rootName = "extraction") {
99
+ // Prepare fields - remove full_text if pages exist
100
+ const preparedObj = prepareFieldsForOutput(obj, "xml");
101
+
102
+ let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
103
+
104
+ const convert = (obj, indent = " ") => {
105
+ for (const [key, value] of Object.entries(obj)) {
106
+ if (value === null || value === undefined) continue;
107
+
108
+ // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
109
+ if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
110
+ continue;
111
+ }
112
+
113
+ if (Array.isArray(value)) {
114
+ value.forEach((item) => {
115
+ xml += `${indent}<${key}>\n`;
116
+ if (typeof item === "object") {
117
+ convert(item, indent + " ");
118
+ } else {
119
+ xml += `${indent} ${escapeXML(String(item))}\n`;
120
+ }
121
+ xml += `${indent}</${key}>\n`;
122
+ });
123
+ } else if (typeof value === "object") {
124
+ xml += `${indent}<${key}>\n`;
125
+ convert(value, indent + " ");
126
+ xml += `${indent}</${key}>\n`;
127
+ } else {
128
+ xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
129
+ }
130
+ }
131
+ };
132
+
133
+ convert(preparedObj);
134
+ xml += `</${rootName}>`;
135
+ return xml;
136
+ }
137
+
138
+ export default function ExportButtons({ isComplete, extractionResult }) {
139
  const [downloading, setDownloading] = useState(null);
140
  const [copied, setCopied] = useState(false);
141
 
142
  const handleDownload = (format) => {
143
+ if (!extractionResult || !extractionResult.fields) {
144
+ console.error("No extraction data available");
145
+ return;
146
+ }
147
+
148
  setDownloading(format);
149
+
150
+ try {
151
+ const fields = extractionResult.fields;
152
+ let content = "";
153
+ let filename = "";
154
+ let mimeType = "";
155
+
156
+ if (format === "json") {
157
+ const preparedFields = prepareFieldsForOutput(fields, "json");
158
+ content = JSON.stringify(preparedFields, null, 2);
159
+ filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
160
+ mimeType = "application/json";
161
+ } else if (format === "xml") {
162
+ content = objectToXML(fields);
163
+ filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
164
+ mimeType = "application/xml";
165
+ }
166
+
167
+ // Create blob and download
168
+ const blob = new Blob([content], { type: mimeType });
169
+ const url = URL.createObjectURL(blob);
170
+ const link = document.createElement("a");
171
+ link.href = url;
172
+ link.download = filename;
173
+ document.body.appendChild(link);
174
+ link.click();
175
+ document.body.removeChild(link);
176
+ URL.revokeObjectURL(url);
177
+
178
+ setDownloading(null);
179
+ } catch (error) {
180
+ console.error("Download error:", error);
181
  setDownloading(null);
182
+ }
183
  };
184
 
185
  const handleCopyLink = () => {