phongnp2010
/

chatbot-llama-7b-chathf

@@ -1,90 +1,76 @@
-# 1.Import Necessary libraries
-import os
-import torch
-import time
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    BitsAndBytesConfig,
-    HfArgumentParser,
-    TrainingArguments,
-    pipeline,
-    logging,
-    )
-from peft import LoraConfig, PeftModel
-import torch.distributed as dist
-from torch.nn.parallel import DistributedDataParallel as DDP
-from accelerate import Accelerator
-import config_train as cfg
-accelerator = Accelerator()
-# 2. Model and Dataset Configuration
-model_name = cfg.model_name
-# new_model = "Llama-2-7b-chat-finetune-qlora"
-# new_model = "/mnt/md1/check_point_text_recognition/ckpt_chatbot/checkpoint-53390"
-new_model = "/mnt/md1/check_point_text_recognition/ckpt_chatbot/241202/checkpoint-2700"
-# device_map = {"":0}
-# 3. Tokenizer and PEFT configuration
-#Load LLama tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code = True)
-tokenizer.pad_token = tokenizer.eos_token
-tokenizer.padding_side = "right"
-# 4. load model for inference
-'''
-Since the model is loaded in full precision (float32), it requires more memory.
-For large models like LLaMA-2 7B, this can consume significant GPU memory.
-'''
-# Step 1: Load the base model
-# base_model = AutoModelForCausalLM.from_pretrained(
-#     model_name,  # The original base model's name or path
-#     device_map=device_map,  # Or specify your device
-# )
-'''
-Mixed Precision: FP16 uses 16-bit floating point numbers, which reduces the memory usage and
-allows the model to fit into GPU memory more easily. However, this could potentially reduce
-numerical accuracy slightly, but in most NLP tasks, the difference is negligible.
-'''
-base_model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    low_cpu_mem_usage=True,
-    return_dict=True,
-    torch_dtype=torch.float16,
-    device_map=cfg.device_map,
-)
-# Step 2: Load the fine-tuned LoRA model (saved from trainer.model.save_pretrained)
-model = PeftModel.from_pretrained(base_model, new_model)  # `new_model` is the path where you saved the model
-# Step 3: Merge the LoRA weights with the base model
-model = model.merge_and_unload()
-model, tokenizer = accelerator.prepare(model, tokenizer) #Wrap model and tokenizer with Accelerator
-# Ignore warnings
-logging.set_verbosity(logging.CRITICAL)
-# 5. Run text generation pipeline with our next model
-# prompt = "How can I learn to optimize my webpage for search engines?"
-prompt_path = "/mnt/md1/check_point_text_recognition/ckpt_chatbot/prompt_for_test.txt"
-prompt = '''
-How to train a LLM model
-'''
-pipe = pipeline(task="text-generation", model=base_model, tokenizer=tokenizer, max_length=2048)
-while True:
-    prompt = input("Type your question: ")
-    if prompt != '0':
-        with open(prompt_path, 'r') as file:
-            text = file.read().strip()
-        start = time.time()
-        result = pipe(f"<s>[INST] {text} [/INST]")
-        result = result[0]['generated_text']
-        answer = result.split('[/INST]')[1].split('</s>')[0].strip()
-        print('Answer:', answer)
-        print('time:', time.time() - start)
-    else:
-        print('Xin cảm ơn!')
-        exit(0)

+**2. Tạo Model Card trên Hugging Face Hub**
+Sau khi bạn soạn thảo xong Model Card, bạn có thể thêm nó vào repo của mình như sau:
+**2.1 Thêm vào File `README.md`**
+Để cung cấp thông tin này cho người dùng, bạn chỉ cần chỉnh sửa file `README.md` trong repo của mình và thêm nội dung Markdown trên vào. Sau khi bạn upload mô hình lên Hugging Face, file `README.md` sẽ được hiển thị ở trang repo của bạn.
+**2.2 Các Tính Năng hỗ trợ Copy Code**
+Hugging Face sẽ tự động nhận diện các đoạn code được viết trong thẻ Markdown ```` ``` ```` và sẽ thêm nút **Copy** phía trên các ô code. Bạn chỉ cần bao bọc mã nguồn trong thẻ ```` ```python ``` ```` hoặc tương tự.
+**Ví dụ:**
+```python
+# Code in markdown file
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("your-username/my-lora-model")
+tokenizer = AutoTokenizer.from_pretrained("your-username/my-lora-model")
+inputs = tokenizer("Hello, how are you?", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+Khi người dùng vào trang repo của bạn trên Hugging Face, họ sẽ thấy một ô code có nút **Copy** ở phía trên. Mỗi khi người dùng nhấn vào nút Copy, mã sẽ được sao chép vào clipboard.
+**3. Cải thiện Giao Diện và Tương Tác**
+Để đảm bảo trang mô hình của bạn dễ sử dụng, bạn có thể làm thêm một số điều sau:
+- **Cung cấp nhiều ví dụ**: Bạn có thể thêm các ví dụ khác để người dùng dễ hiểu hơn về cách sử dụng mô hình của bạn.
+- **Mô tả chi tiết hơn**: Bao gồm chi tiết về loại dữ liệu mà mô hình được fine-tune trên đó, các hạn chế của mô hình, và các khả năng đặc biệt.
+**Ví dụ Model Card hoàn chỉnh**
+```markdown
+# My LoRA Model
+This is a fine-tuned LoRA model based on [Base Model Name].
+## Model Description
+This model is fine-tuned using LoRA (Low-Rank Adaptation) on top of a pre-trained large language model. It is designed to perform text generation tasks efficiently with reduced memory footprint compared to full fine-tuning.
+##Training Details:
+- Base Model: [Base Model Name]
+- Fine-tuning Method: LoRA
+- Fine-tuning Data: [Dataset Name or Description]
+- Intended Use: Text Generation, Conversational AI, etc.
+## How to use
+You can use this model directly with the `transformers` library:
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Load model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("your-username/my-lora-model")
+tokenizer = AutoTokenizer.from_pretrained("your-username/my-lora-model")
+# Generate text
+inputs = tokenizer("Hello, how are you?", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+---
+1. **Ô Text**: Dùng để mô tả mô hình, cách thức fine-tune, và các thông tin bổ sung.
+2. **Ô Code**: Sử dụng cú pháp Markdown để hiển thị ví dụ code. Hugging Face tự động hiển thị nút **Copy** trên các ô code.
+3. **Copy Code**: Nút copy code sẽ tự động xuất hiện khi bạn sử dụng cú pháp Markdown chuẩn để trình bày các đoạn mã.
+Khi bạn hoàn thành việc soạn thảo Model Card, hãy upload nó lên Hugging Face và kiểm tra xem các tính năng hiển thị như mong đợi!