File size: 4,325 Bytes
3f7cfab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import pytest

from tests.utils import wrap_test_forked


@pytest.mark.skipif(not os.getenv('BENCHMARK'),
                    reason="Only valid on sufficiently large system and not normal part of testing."
                           "  Instead used to get eval scores for all models.")
@pytest.mark.parametrize(
    "base_model",
    [
        "h2oai/h2ogpt-oasst1-falcon-40b",
        "h2oai/h2ogpt-oig-oasst1-512-6_9b",
        "h2oai/h2ogpt-oig-oasst1-512-12b",
        "h2oai/h2ogpt-oig-oasst1-512-20b",
        "h2oai/h2ogpt-oasst1-512-12b",
        "h2oai/h2ogpt-oasst1-512-20b",
        "h2oai/h2ogpt-gm-oasst1-en-1024-20b",
        "databricks/dolly-v2-12b",
        "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2",
        "ehartford/WizardLM-7B-Uncensored",
        "ehartford/WizardLM-13B-Uncensored",
        "AlekseyKorshuk/vicuna-7b",
        "TheBloke/stable-vicuna-13B-HF",
        "decapoda-research/llama-7b-hf",
        "decapoda-research/llama-13b-hf",
        "decapoda-research/llama-30b-hf",
        "junelee/wizard-vicuna-13b",
        "openaccess-ai-collective/wizard-mega-13b",
    ]
)
@wrap_test_forked
def test_score_eval(base_model):
    from generate import main
    main(
        base_model=base_model,
        chat=False,
        stream_output=False,
        gradio=False,
        eval_prompts_only_num=500,
        eval_as_output=False,
        num_beams=2,
        infer_devices=False,
    )


@pytest.mark.skipif(not os.getenv('FALCONS'), reason="download purpose")
@pytest.mark.parametrize(
    "base_model",
    [
        "OpenAssistant/falcon-7b-sft-top1-696",
        "OpenAssistant/falcon-7b-sft-mix-2000",
        "h2oai/h2ogpt-oasst1-falcon-40b",
        "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
        "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2",
        "h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b",
        "OpenAssistant/falcon-40b-sft-top1-560",
        "OpenAssistant/falcon-40b-sft-mix-1226",
    ]
)
@wrap_test_forked
def test_get_falcons(base_model):
    import torch
    from transformers import AutoTokenizer, AutoModelForCausalLM

    t = AutoTokenizer.from_pretrained(base_model,
                                      use_fast=False,
                                      padding_side="left",
                                      trust_remote_code=True,
                                      use_auth_token=True,
                                      )
    assert t is not None
    m = AutoModelForCausalLM.from_pretrained(base_model,
                                             trust_remote_code=True,
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                             )
    assert m is not None


@pytest.mark.skipif(not os.getenv('LLAMA'), reason="LLaMa conversion")
@wrap_test_forked
def test_get_landmark_llama():
    import torch
    from transformers import AutoTokenizer, AutoModelForCausalLM
    from transformers import LlamaForCausalLM, LlamaTokenizer
    m = LlamaForCausalLM.from_pretrained("epfml/landmark-attention-llama7b-wdiff")
    t = LlamaTokenizer.from_pretrained("epfml/landmark-attention-llama7b-wdiff")
    assert m is not None and t is not None

    os.system("""
#
# step 1, convert llama to HF format
pip install protobuf==3.19.0
source ~/.bashrc.mamba
mamba create -n trans
conda activate trans
conda install python=3.10 -y

git clone https://github.com/epfml/landmark-attention.git
pip install fire datasets
git clone https://github.com/huggingface/transformers.git
cd transformers
pip install .
pip install torch accelerate sentencepiece protobuf==3.19.0
# below requires LLaMa weights
python src/transformers/models/llama/convert_llama_weights_to_hf.py     --input_dir /data/jon/LLaMA --model_size 7B --output_dir llama_7B
#
# step 2, make landmark model (change hash if updated)
mkdir -p epfml/landmark-attention-llama7b-wdiff
cd epfml/landmark-attention-llama7b-wdiff
ln -s ~/.cache/huggingface/hub/models--epfml--landmark-attention-llama7b-wdiff/snapshots/050562871ac72723b4ab674f0392b02cd9609842/* .
cd ../../
python ../landmark-attention/llama/weight_diff.py recover --path_raw llama_7B --path_diff epfml/landmark-attention-llama7b-wdiff --path_tuned landmark_llama_7b
""")