rootxhacker commited on
Commit
dd39b51
Β·
verified Β·
1 Parent(s): 2d8cff9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -17
app.py CHANGED
@@ -513,6 +513,13 @@ def load_model():
513
  if tokenizer is not None and model is not None:
514
  return tokenizer, model, device
515
 
 
 
 
 
 
 
 
516
  try:
517
  # This appears to be a LoRA adapter
518
  adapter_path = "rootxhacker/llama-3B-diffusion-exp-fixed"
@@ -520,19 +527,24 @@ def load_model():
520
 
521
  print(f"Loading AR-Diffusion model on {device}...")
522
 
523
- # Load tokenizer from adapter
524
- tokenizer = AutoTokenizer.from_pretrained(adapter_path, trust_remote_code=True)
 
 
 
 
525
  if tokenizer.pad_token is None:
526
  tokenizer.pad_token = tokenizer.eos_token
527
 
528
- # Load the adapter model
529
  print("Loading adapter model...")
530
  model = AutoModelForCausalLM.from_pretrained(
531
  adapter_path,
532
  torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
533
  device_map="auto" if device.type == "cuda" else None,
534
  trust_remote_code=True,
535
- low_cpu_mem_usage=True
 
536
  )
537
 
538
  print("βœ… AR-Diffusion model loaded successfully!")
@@ -541,24 +553,56 @@ def load_model():
541
  except Exception as e:
542
  print(f"❌ Error loading {adapter_path}: {e}")
543
 
544
- # Fallback to a working model for demonstration
545
- print("πŸ”„ Falling back to demonstration model...")
546
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
547
- fallback_model = "gpt2-medium"
548
 
549
- tokenizer = AutoTokenizer.from_pretrained(fallback_model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  if tokenizer.pad_token is None:
551
  tokenizer.pad_token = tokenizer.eos_token
552
 
553
  model = AutoModelForCausalLM.from_pretrained(
554
- fallback_model,
555
  torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
556
  device_map="auto" if device.type == "cuda" else None,
557
  low_cpu_mem_usage=True
558
  )
559
 
560
- print(f"βœ… Fallback model {fallback_model} loaded successfully!")
561
- print("⚠️ Note: Using fallback model - AR-Diffusion features may not work as expected")
562
  return tokenizer, model, device
563
 
564
  def cleanup_memory():
@@ -604,8 +648,9 @@ def chat_function(message, history, mode, progress=gr.Progress()):
604
  - **Words/Second:** {stats['words_per_second']:.1f}
605
  - **Steps:** {stats['steps']}"""
606
 
607
- # Update history
608
- history.append([message, response])
 
609
 
610
  # Cleanup memory for Zero GPU efficiency
611
  cleanup_memory()
@@ -614,7 +659,8 @@ def chat_function(message, history, mode, progress=gr.Progress()):
614
 
615
  except Exception as e:
616
  error_msg = f"Error: {str(e)}"
617
- history.append([message, error_msg])
 
618
  cleanup_memory()
619
  return history, "", f"**❌ Error occurred during generation**"
620
 
@@ -646,6 +692,7 @@ def create_interface():
646
  <p>This is an experimental AR-Diffusion model. Results may vary and the model is still under development.</p>
647
  <p><em>πŸ”₯ Powered by Zero GPU with @spaces.GPU</em></p>
648
  <p><small>Model: rootxhacker/llama-3B-diffusion-exp-fixed (LoRA Adapter)</small></p>
 
649
  </div>
650
  """)
651
 
@@ -654,9 +701,9 @@ def create_interface():
654
  chatbot = gr.Chatbot(
655
  [],
656
  elem_id="chatbot",
657
- bubble_full_width=False,
658
  height=500,
659
- show_label=False
 
660
  )
661
 
662
  with gr.Row():
@@ -698,7 +745,8 @@ def create_interface():
698
  <p>This experimental model uses autoregressive diffusion for text generation, creating responses by iteratively denoising masked tokens.</p>
699
  <br>
700
  <p><strong>Model:</strong> LoRA adapter trained for AR-Diffusion</p>
701
- <p><strong>Note:</strong> This model is experimental and may produce unexpected results. If the specific model fails to load, a fallback model will be used for demonstration.</p>
 
702
  </div>
703
  """)
704
 
 
513
  if tokenizer is not None and model is not None:
514
  return tokenizer, model, device
515
 
516
+ # Get HF token from environment
517
+ hf_token = os.getenv("HF_TOKEN")
518
+ if hf_token:
519
+ print("πŸ”‘ HF_TOKEN found - using authenticated access")
520
+ else:
521
+ print("⚠️ No HF_TOKEN found - using public access only")
522
+
523
  try:
524
  # This appears to be a LoRA adapter
525
  adapter_path = "rootxhacker/llama-3B-diffusion-exp-fixed"
 
527
 
528
  print(f"Loading AR-Diffusion model on {device}...")
529
 
530
+ # Load tokenizer from adapter with token
531
+ tokenizer = AutoTokenizer.from_pretrained(
532
+ adapter_path,
533
+ trust_remote_code=True,
534
+ token=hf_token
535
+ )
536
  if tokenizer.pad_token is None:
537
  tokenizer.pad_token = tokenizer.eos_token
538
 
539
+ # Load the adapter model with token
540
  print("Loading adapter model...")
541
  model = AutoModelForCausalLM.from_pretrained(
542
  adapter_path,
543
  torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
544
  device_map="auto" if device.type == "cuda" else None,
545
  trust_remote_code=True,
546
+ low_cpu_mem_usage=True,
547
+ token=hf_token
548
  )
549
 
550
  print("βœ… AR-Diffusion model loaded successfully!")
 
553
  except Exception as e:
554
  print(f"❌ Error loading {adapter_path}: {e}")
555
 
556
+ # Try alternative working models for AR-Diffusion demo
557
+ print("πŸ”„ Trying alternative models...")
558
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
559
 
560
+ # Try different models in order of preference
561
+ alternative_models = [
562
+ "microsoft/DialoGPT-medium",
563
+ "gpt2-large",
564
+ "gpt2-medium",
565
+ "distilgpt2"
566
+ ]
567
+
568
+ for alt_model in alternative_models:
569
+ try:
570
+ print(f"Trying {alt_model}...")
571
+ tokenizer = AutoTokenizer.from_pretrained(alt_model, token=hf_token)
572
+ if tokenizer.pad_token is None:
573
+ tokenizer.pad_token = tokenizer.eos_token
574
+
575
+ model = AutoModelForCausalLM.from_pretrained(
576
+ alt_model,
577
+ torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
578
+ device_map="auto" if device.type == "cuda" else None,
579
+ low_cpu_mem_usage=True,
580
+ token=hf_token
581
+ )
582
+
583
+ print(f"βœ… Alternative model {alt_model} loaded successfully!")
584
+ print("⚠️ Note: Using alternative model - AR-Diffusion features adapted for demo")
585
+ return tokenizer, model, device
586
+
587
+ except Exception as alt_e:
588
+ print(f"❌ {alt_model} failed: {alt_e}")
589
+ continue
590
+
591
+ # Final fallback
592
+ print("πŸ”„ Using final fallback model...")
593
+ tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
594
  if tokenizer.pad_token is None:
595
  tokenizer.pad_token = tokenizer.eos_token
596
 
597
  model = AutoModelForCausalLM.from_pretrained(
598
+ "distilgpt2",
599
  torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
600
  device_map="auto" if device.type == "cuda" else None,
601
  low_cpu_mem_usage=True
602
  )
603
 
604
+ print("βœ… Final fallback model loaded successfully!")
605
+ print("⚠️ Note: Using basic model - AR-Diffusion features adapted for demo")
606
  return tokenizer, model, device
607
 
608
  def cleanup_memory():
 
648
  - **Words/Second:** {stats['words_per_second']:.1f}
649
  - **Steps:** {stats['steps']}"""
650
 
651
+ # Update history with proper message format
652
+ history.append({"role": "user", "content": message})
653
+ history.append({"role": "assistant", "content": response})
654
 
655
  # Cleanup memory for Zero GPU efficiency
656
  cleanup_memory()
 
659
 
660
  except Exception as e:
661
  error_msg = f"Error: {str(e)}"
662
+ history.append({"role": "user", "content": message})
663
+ history.append({"role": "assistant", "content": error_msg})
664
  cleanup_memory()
665
  return history, "", f"**❌ Error occurred during generation**"
666
 
 
692
  <p>This is an experimental AR-Diffusion model. Results may vary and the model is still under development.</p>
693
  <p><em>πŸ”₯ Powered by Zero GPU with @spaces.GPU</em></p>
694
  <p><small>Model: rootxhacker/llama-3B-diffusion-exp-fixed (LoRA Adapter)</small></p>
695
+ <p><small>πŸ”‘ Requires HF_TOKEN for gated model access</small></p>
696
  </div>
697
  """)
698
 
 
701
  chatbot = gr.Chatbot(
702
  [],
703
  elem_id="chatbot",
 
704
  height=500,
705
+ show_label=False,
706
+ type="messages"
707
  )
708
 
709
  with gr.Row():
 
745
  <p>This experimental model uses autoregressive diffusion for text generation, creating responses by iteratively denoising masked tokens.</p>
746
  <br>
747
  <p><strong>Model:</strong> LoRA adapter trained for AR-Diffusion</p>
748
+ <p><strong>Authentication:</strong> Requires HF_TOKEN for gated Llama model access</p>
749
+ <p><strong>Note:</strong> This model is experimental and may produce unexpected results. If the specific model fails to load, alternative models will be used for demonstration.</p>
750
  </div>
751
  """)
752