|
--- |
|
license: mit |
|
language: |
|
- en |
|
base_model: meta-llama/Meta-Llama-3.1-8B-Instruct |
|
--- |
|
This is plain vanilla exported as onnx. I am researching the offline use of various models, and thought might come in handy for the commmunity |
|
Example use |
|
#0. download and unzip the package into a subfolder in you project folder |
|
|
|
#1. Create a new python environment |
|
python -m venv llama_env |
|
|
|
#2. activate the environment |
|
llama_env\Scripts\activate |
|
|
|
#3. Install onnx runtime |
|
pip install onnx onnxruntime-gpu |
|
|
|
#4. Install transformers and py/torch |
|
pip install transformers |
|
pip install torch |
|
pip install pytorch |
|
|
|
#I had to run this when I had a conflic |
|
python -m pip install --upgrade pip |
|
python -m pip install "numpy<2" |
|
|
|
#I use VSCode, so if you'd like: |
|
#Install Jupyter and create notebook |
|
pip install jupyter |
|
code #run vscode |
|
|
|
|
|
|
|
import onnxruntime as ort |
|
import torch |
|
import numpy as np |
|
|
|
# Load the ONNX model |
|
onnx_model_path = "payhTo/llama3.1.onnx" |
|
session = ort.InferenceSession(onnx_model_path) |
|
|
|
# Check the model's input and output names and shapes |
|
for input_meta in session.get_inputs(): |
|
print(f"Input Name: {input_meta.name}, Shape: {input_meta.shape}, Type: {input_meta.type}") |
|
|
|
for output_meta in session.get_outputs(): |
|
print(f"Output Name: {output_meta.name}, Shape: {output_meta.shape}, Type: {output_meta.type}") |
|
|