runningSnail
commited on
Commit
•
6a83d63
1
Parent(s):
abd40c7
code clean
Browse files
README.md
CHANGED
@@ -42,18 +42,22 @@ Dolphin employs a decoder-decoder framework with two main components:
|
|
42 |
![Model Architecture](modelstructure.jpg)
|
43 |
|
44 |
## Running the Model
|
45 |
-
Method 1
|
|
|
46 |
```bash
|
47 |
git lfs install
|
48 |
git clone https://huggingface.co/NexaAIDev/Dolphin
|
49 |
python inference_example.py
|
50 |
```
|
51 |
|
52 |
-
Method 2
|
|
|
53 |
```
|
54 |
pip install nexaai-dolphin
|
55 |
```
|
|
|
56 |
Then run the following commands:
|
|
|
57 |
```python
|
58 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
59 |
import torch
|
@@ -75,14 +79,12 @@ def inference_instruct(mycontext, question, device="cuda:0"):
|
|
75 |
.unsqueeze(0)
|
76 |
.to(device)
|
77 |
)
|
78 |
-
# to process the context
|
79 |
context_tokenized = tokenizer(
|
80 |
mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
|
81 |
return_tensors="pt",
|
82 |
)
|
83 |
context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
|
84 |
context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
|
85 |
-
# We conduct a inference process
|
86 |
for i in range(context_token_count):
|
87 |
next_token = (
|
88 |
model(
|
@@ -106,14 +108,12 @@ if __name__ == "__main__":
|
|
106 |
device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
|
107 |
AutoConfig.register("dolphin", DolphinConfig)
|
108 |
AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
|
109 |
-
# Load the tokenizer and model
|
110 |
tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin')
|
111 |
model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True, torch_dtype=torch.bfloat16, device_map=device_name)
|
112 |
|
113 |
# Run inference example
|
114 |
mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
|
115 |
question = "Who founded Nexa AI?"
|
116 |
-
# Pass the context and the correct device string
|
117 |
result = inference_instruct(mycontext, question, device=device_name)
|
118 |
print("Result:", result)
|
119 |
```
|
|
|
42 |
![Model Architecture](modelstructure.jpg)
|
43 |
|
44 |
## Running the Model
|
45 |
+
### Method 1
|
46 |
+
download this repository and run the following commands:
|
47 |
```bash
|
48 |
git lfs install
|
49 |
git clone https://huggingface.co/NexaAIDev/Dolphin
|
50 |
python inference_example.py
|
51 |
```
|
52 |
|
53 |
+
### Method 2
|
54 |
+
Install `dolphin` package
|
55 |
```
|
56 |
pip install nexaai-dolphin
|
57 |
```
|
58 |
+
|
59 |
Then run the following commands:
|
60 |
+
|
61 |
```python
|
62 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
63 |
import torch
|
|
|
79 |
.unsqueeze(0)
|
80 |
.to(device)
|
81 |
)
|
|
|
82 |
context_tokenized = tokenizer(
|
83 |
mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
|
84 |
return_tensors="pt",
|
85 |
)
|
86 |
context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
|
87 |
context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
|
|
|
88 |
for i in range(context_token_count):
|
89 |
next_token = (
|
90 |
model(
|
|
|
108 |
device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
|
109 |
AutoConfig.register("dolphin", DolphinConfig)
|
110 |
AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
|
|
|
111 |
tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin')
|
112 |
model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True, torch_dtype=torch.bfloat16, device_map=device_name)
|
113 |
|
114 |
# Run inference example
|
115 |
mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
|
116 |
question = "Who founded Nexa AI?"
|
|
|
117 |
result = inference_instruct(mycontext, question, device=device_name)
|
118 |
print("Result:", result)
|
119 |
```
|