akhaliq HF Staff commited on
Commit
3bbc203
·
1 Parent(s): 5d7cc0b

more fixes

Browse files
Files changed (1) hide show
  1. backend_deploy.py +94 -45
backend_deploy.py CHANGED
@@ -37,55 +37,87 @@ def parse_html_code(code: str) -> str:
37
 
38
 
39
  def parse_transformers_js_output(code: str) -> Dict[str, str]:
40
- """Parse transformers.js output into separate files"""
41
- files = {}
42
 
43
- # First try: Pattern to match === filename === sections
44
- pattern = r'===\s*(\S+\.(?:html|js|css))\s*===\s*(.*?)(?====|$)'
45
- matches = re.finditer(pattern, code, re.DOTALL | re.IGNORECASE)
 
 
 
 
46
 
47
- for match in matches:
48
- filename = match.group(1).strip()
49
- content = match.group(2).strip()
50
-
51
- # Clean up code blocks if present
52
- content = re.sub(r'^```\w*\s*', '', content, flags=re.MULTILINE)
53
- content = re.sub(r'```\s*$', '', content, flags=re.MULTILINE)
54
-
55
- files[filename] = content
56
 
57
- # Fallback: Try to extract from markdown code blocks if === format not found
58
- if not files:
59
- print("[Deploy] === format not found, trying markdown code blocks fallback")
60
-
61
- # Try to find ```html, ```javascript, ```css blocks
62
- html_match = re.search(r'```html\s*(.*?)```', code, re.DOTALL | re.IGNORECASE)
63
- js_match = re.search(r'```javascript\s*(.*?)```', code, re.DOTALL | re.IGNORECASE)
64
- css_match = re.search(r'```css\s*(.*?)```', code, re.DOTALL | re.IGNORECASE)
65
-
 
 
 
 
 
66
  if html_match:
67
- content = html_match.group(1).strip()
68
- # Remove comment lines like "<!-- index.html content here -->"
69
- content = re.sub(r'<!--\s*index\.html.*?-->\s*', '', content, flags=re.IGNORECASE)
70
- files['index.html'] = content
71
-
 
72
  if js_match:
73
- content = js_match.group(1).strip()
74
- # Remove comment lines like "// index.js content here"
75
- content = re.sub(r'//\s*index\.js.*?\n', '', content, flags=re.IGNORECASE)
76
- files['index.js'] = content
77
-
 
78
  if css_match:
79
- content = css_match.group(1).strip()
80
- # Remove comment lines like "/* style.css content here */"
81
- content = re.sub(r'/\*\s*style\.css.*?\*/', '', content, flags=re.IGNORECASE)
82
- files['style.css'] = content
83
 
84
- # Last resort: try to extract as single HTML file
85
- if not files:
86
- html_content = parse_html_code(code)
87
- if html_content:
88
- files['index.html'] = html_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  return files
91
 
@@ -362,6 +394,13 @@ def deploy_to_huggingface_space(
362
  files = parse_transformers_js_output(code)
363
  print(f"[Deploy] Parsed transformers.js files: {list(files.keys())}")
364
 
 
 
 
 
 
 
 
365
  # Validate all three files are present
366
  missing_files = []
367
  if not files.get('index.html'):
@@ -385,10 +424,14 @@ def deploy_to_huggingface_space(
385
  print(f"[Deploy] {error_msg}")
386
  return False, error_msg, None
387
 
388
- # Write transformers.js files
389
  for filename, content in files.items():
390
- print(f"[Deploy] Writing {filename} ({len(content)} chars)")
391
- (temp_path / filename).write_text(content, encoding='utf-8')
 
 
 
 
392
 
393
  # For transformers.js, we'll upload files individually (not via upload_folder)
394
  use_individual_uploads = True
@@ -594,12 +637,18 @@ def deploy_to_huggingface_space(
594
  temp_file_path = None
595
  try:
596
  # Create a NEW temp file for this upload (key difference from old approach)
 
597
  with tempfile.NamedTemporaryFile("w", suffix=f".{file_name.split('.')[-1]}", delete=False, encoding='utf-8') as f:
598
  f.write(file_content)
599
  f.flush() # Ensure all content is written to disk before closing
600
  temp_file_path = f.name
601
  # File is now closed and flushed, safe to upload
602
 
 
 
 
 
 
603
  # Upload the file without commit_message (HF handles this for spaces)
604
  api.upload_file(
605
  path_or_fileobj=temp_file_path,
 
37
 
38
 
39
  def parse_transformers_js_output(code: str) -> Dict[str, str]:
40
+ """Parse transformers.js output into separate files (index.html, index.js, style.css)
 
41
 
42
+ Uses comprehensive parsing patterns to handle various LLM output formats.
43
+ """
44
+ files = {
45
+ 'index.html': '',
46
+ 'index.js': '',
47
+ 'style.css': ''
48
+ }
49
 
50
+ # Multiple patterns to match the three code blocks with different variations
51
+ html_patterns = [
52
+ r'```html\s*\n([\s\S]*?)(?:```|\Z)',
53
+ r'```htm\s*\n([\s\S]*?)(?:```|\Z)',
54
+ r'```\s*(?:index\.html|html)\s*\n([\s\S]*?)(?:```|\Z)'
55
+ ]
 
 
 
56
 
57
+ js_patterns = [
58
+ r'```javascript\s*\n([\s\S]*?)(?:```|\Z)',
59
+ r'```js\s*\n([\s\S]*?)(?:```|\Z)',
60
+ r'```\s*(?:index\.js|javascript|js)\s*\n([\s\S]*?)(?:```|\Z)'
61
+ ]
62
+
63
+ css_patterns = [
64
+ r'```css\s*\n([\s\S]*?)(?:```|\Z)',
65
+ r'```\s*(?:style\.css|css)\s*\n([\s\S]*?)(?:```|\Z)'
66
+ ]
67
+
68
+ # Extract HTML content
69
+ for pattern in html_patterns:
70
+ html_match = re.search(pattern, code, re.IGNORECASE)
71
  if html_match:
72
+ files['index.html'] = html_match.group(1).strip()
73
+ break
74
+
75
+ # Extract JavaScript content
76
+ for pattern in js_patterns:
77
+ js_match = re.search(pattern, code, re.IGNORECASE)
78
  if js_match:
79
+ files['index.js'] = js_match.group(1).strip()
80
+ break
81
+
82
+ # Extract CSS content
83
+ for pattern in css_patterns:
84
+ css_match = re.search(pattern, code, re.IGNORECASE)
85
  if css_match:
86
+ files['style.css'] = css_match.group(1).strip()
87
+ break
 
 
88
 
89
+ # Fallback: support === index.html === format if any file is missing
90
+ if not (files['index.html'] and files['index.js'] and files['style.css']):
91
+ # Use regex to extract sections
92
+ html_fallback = re.search(r'===\s*index\.html\s*===\s*\n([\s\S]+?)(?=\n===|$)', code, re.IGNORECASE)
93
+ js_fallback = re.search(r'===\s*index\.js\s*===\s*\n([\s\S]+?)(?=\n===|$)', code, re.IGNORECASE)
94
+ css_fallback = re.search(r'===\s*style\.css\s*===\s*\n([\s\S]+?)(?=\n===|$)', code, re.IGNORECASE)
95
+
96
+ if html_fallback:
97
+ files['index.html'] = html_fallback.group(1).strip()
98
+ if js_fallback:
99
+ files['index.js'] = js_fallback.group(1).strip()
100
+ if css_fallback:
101
+ files['style.css'] = css_fallback.group(1).strip()
102
+
103
+ # Additional fallback: extract from numbered sections or file headers
104
+ if not (files['index.html'] and files['index.js'] and files['style.css']):
105
+ # Try patterns like "1. index.html:" or "**index.html**"
106
+ patterns = [
107
+ (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)index\.html(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'index.html'),
108
+ (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)index\.js(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'index.js'),
109
+ (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)style\.css(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'style.css')
110
+ ]
111
+
112
+ for pattern, file_key in patterns:
113
+ if not files[file_key]:
114
+ match = re.search(pattern, code, re.IGNORECASE | re.MULTILINE)
115
+ if match:
116
+ # Clean up the content by removing any code block markers
117
+ content = match.group(1).strip()
118
+ content = re.sub(r'^```\w*\s*\n', '', content)
119
+ content = re.sub(r'\n```\s*$', '', content)
120
+ files[file_key] = content.strip()
121
 
122
  return files
123
 
 
394
  files = parse_transformers_js_output(code)
395
  print(f"[Deploy] Parsed transformers.js files: {list(files.keys())}")
396
 
397
+ # Log file sizes for debugging
398
+ for fname, fcontent in files.items():
399
+ if fcontent:
400
+ print(f"[Deploy] {fname}: {len(fcontent)} characters")
401
+ else:
402
+ print(f"[Deploy] {fname}: EMPTY")
403
+
404
  # Validate all three files are present
405
  missing_files = []
406
  if not files.get('index.html'):
 
424
  print(f"[Deploy] {error_msg}")
425
  return False, error_msg, None
426
 
427
+ # Write transformers.js files to temp directory
428
  for filename, content in files.items():
429
+ file_path = temp_path / filename
430
+ print(f"[Deploy] Writing {filename} ({len(content)} chars) to {file_path}")
431
+ file_path.write_text(content, encoding='utf-8')
432
+ # Verify the write was successful
433
+ written_size = file_path.stat().st_size
434
+ print(f"[Deploy] Verified {filename}: {written_size} bytes on disk")
435
 
436
  # For transformers.js, we'll upload files individually (not via upload_folder)
437
  use_individual_uploads = True
 
637
  temp_file_path = None
638
  try:
639
  # Create a NEW temp file for this upload (key difference from old approach)
640
+ print(f"[Deploy] Creating temp file for {file_name} with {len(file_content)} chars")
641
  with tempfile.NamedTemporaryFile("w", suffix=f".{file_name.split('.')[-1]}", delete=False, encoding='utf-8') as f:
642
  f.write(file_content)
643
  f.flush() # Ensure all content is written to disk before closing
644
  temp_file_path = f.name
645
  # File is now closed and flushed, safe to upload
646
 
647
+ # Verify temp file size before upload
648
+ import os as _os
649
+ temp_size = _os.path.getsize(temp_file_path)
650
+ print(f"[Deploy] Temp file {file_name} size on disk: {temp_size} bytes (expected ~{len(file_content)} chars)")
651
+
652
  # Upload the file without commit_message (HF handles this for spaces)
653
  api.upload_file(
654
  path_or_fileobj=temp_file_path,