JusteLeo commited on
Commit
7985a06
·
verified ·
1 Parent(s): aac8602

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +94 -94
README.md CHANGED
@@ -1,94 +1,94 @@
1
- ---
2
- license: apache-2.0
3
- language:
4
- - en
5
- base_model: JusteLeo/Qwen3-0.6B-T5-xxl
6
- tags:
7
- - split
8
- - encoder
9
- - embedding
10
- - Text Generation
11
- ---
12
-
13
- # Qwen3-0.6B-T5-xxl-split
14
-
15
- ## Model Description
16
-
17
- This repository provides the components of the `Qwen3-0.6B-T5-xxl` model, split into two parts. This is intended for advanced users who wish to perform custom operations, such as GGUF conversion or other model architecture modifications.
18
-
19
- Both components are provided in **float32** format to ensure maximum precision for downstream tasks like quantization.
20
-
21
- ## Repository Contents
22
-
23
- - **/qwen_body/**: Contains the fine-tuned `Qwen3-0.6B` model body. This is a standard Hugging Face model directory. The model weights are in `float32`.
24
- - **/projection_head/**: Contains the fine-tuned projection head as a single `projection_head.pth` file. This is a PyTorch state dictionary.
25
-
26
- ## How to Use
27
-
28
- To use these components, you need to load them separately and then combine them in a two-step inference process.
29
-
30
- ```python
31
- import torch
32
- from torch import nn
33
- from transformers import AutoTokenizer, AutoModel
34
- import numpy as np
35
-
36
- # --- 1. Load Components ---
37
- device = "cuda"
38
-
39
- # Load the model body
40
- body_model = AutoModel.from_pretrained("./qwen_body").to(device)
41
- tokenizer = AutoTokenizer.from_pretrained("./qwen_body")
42
-
43
- # Load the projection head
44
- # First, re-create the architecture
45
- input_dim = body_model.config.hidden_size # 1024
46
- hidden_dim = 2048
47
- output_dim = 4096
48
- head_model = nn.Sequential(
49
- nn.Linear(input_dim, hidden_dim),
50
- nn.GELU(),
51
- nn.Dropout(0.1),
52
- nn.Linear(hidden_dim, output_dim)
53
- ).to(device)
54
- # Then, load the saved weights
55
- head_model.load_state_dict(torch.load("./projection_head/projection_head.pth"))
56
-
57
- body_model.eval()
58
- head_model.eval()
59
-
60
- # --- 2. Create a unified inference function ---
61
- def get_final_embedding(text: str):
62
- # a) Tokenize the input text
63
- inputs = tokenizer(text, return_tensors="pt").to(device)
64
-
65
- # b) Get the base embedding from the body model
66
- with torch.no_grad():
67
- outputs_body = body_model(**inputs)
68
- last_hidden_state = outputs_body.last_hidden_state
69
-
70
- # c) Perform mean pooling
71
- attention_mask = inputs['attention_mask']
72
- mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
73
- sum_embeddings = torch.sum(last_hidden_state * mask_expanded, 1)
74
- sum_mask = torch.clamp(mask_expanded.sum(1), min=1e-9)
75
- pooled_embedding = sum_embeddings / sum_mask
76
-
77
- # d) Pass the pooled embedding through the projection head
78
- with torch.no_grad():
79
- final_embedding = head_model(pooled_embedding)
80
-
81
- return final_embedding
82
-
83
- # --- 3. Test the pipeline ---
84
- prompt = "A high-tech laboratory with glowing vials and holographic displays."
85
- embedding = get_final_embedding(prompt)
86
-
87
- print("Inference successful!")
88
- print(f"Output shape: {embedding.shape}")
89
- # Expected output shape: (1, 4096)
90
- ```
91
-
92
- ## License
93
-
94
- This repository is licensed under the **MIT License**.
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ base_model: JusteLeo/Qwen3-0.6B-T5-xxl
6
+ tags:
7
+ - split
8
+ - encoder
9
+ - embedding
10
+ - Text Generation
11
+ ---
12
+
13
+ # Qwen3-0.6B-T5-xxl-split
14
+
15
+ ## Model Description
16
+
17
+ This repository provides the components of the `Qwen3-0.6B-T5-xxl` model, split into two parts. This is intended for advanced users who wish to perform custom operations, such as GGUF conversion or other model architecture modifications.
18
+
19
+ Both components are provided in **float32** format to ensure maximum precision for downstream tasks like quantization.
20
+
21
+ ## Repository Contents
22
+
23
+ - **/qwen_body/**: Contains the fine-tuned `Qwen3-0.6B` model body. This is a standard Hugging Face model directory. The model weights are in `float32`.
24
+ - **/projection_head/**: Contains the fine-tuned projection head as a single `projection_head.pth` file. This is a PyTorch state dictionary.
25
+
26
+ ## How to Use
27
+
28
+ To use these components, you need to load them separately and then combine them in a two-step inference process.
29
+
30
+ ```python
31
+ import torch
32
+ from torch import nn
33
+ from transformers import AutoTokenizer, AutoModel
34
+ import numpy as np
35
+
36
+ # --- 1. Load Components ---
37
+ device = "cuda"
38
+
39
+ # Load the model body
40
+ body_model = AutoModel.from_pretrained("./qwen_body").to(device)
41
+ tokenizer = AutoTokenizer.from_pretrained("./qwen_body")
42
+
43
+ # Load the projection head
44
+ # First, re-create the architecture
45
+ input_dim = body_model.config.hidden_size # 1024
46
+ hidden_dim = 2048
47
+ output_dim = 4096
48
+ head_model = nn.Sequential(
49
+ nn.Linear(input_dim, hidden_dim),
50
+ nn.GELU(),
51
+ nn.Dropout(0.1),
52
+ nn.Linear(hidden_dim, output_dim)
53
+ ).to(device)
54
+ # Then, load the saved weights
55
+ head_model.load_state_dict(torch.load("./projection_head/projection_head.pth"))
56
+
57
+ body_model.eval()
58
+ head_model.eval()
59
+
60
+ # --- 2. Create a unified inference function ---
61
+ def get_final_embedding(text: str):
62
+ # a) Tokenize the input text
63
+ inputs = tokenizer(text, return_tensors="pt").to(device)
64
+
65
+ # b) Get the base embedding from the body model
66
+ with torch.no_grad():
67
+ outputs_body = body_model(**inputs)
68
+ last_hidden_state = outputs_body.last_hidden_state
69
+
70
+ # c) Perform mean pooling
71
+ attention_mask = inputs['attention_mask']
72
+ mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
73
+ sum_embeddings = torch.sum(last_hidden_state * mask_expanded, 1)
74
+ sum_mask = torch.clamp(mask_expanded.sum(1), min=1e-9)
75
+ pooled_embedding = sum_embeddings / sum_mask
76
+
77
+ # d) Pass the pooled embedding through the projection head
78
+ with torch.no_grad():
79
+ final_embedding = head_model(pooled_embedding)
80
+
81
+ return final_embedding
82
+
83
+ # --- 3. Test the pipeline ---
84
+ prompt = "A high-tech laboratory with glowing vials and holographic displays."
85
+ embedding = get_final_embedding(prompt)
86
+
87
+ print("Inference successful!")
88
+ print(f"Output shape: {embedding.shape}")
89
+ # Expected output shape: (1, 4096)
90
+ ```
91
+
92
+ ## License
93
+
94
+ This repository is licensed under the **Apache license 2.0**.