File size: 1,673 Bytes
9b744c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"text": {}

topic_maintainers_map ={
    "text models": ["@ArthurZucker", "@younesbelkada"],
    "vision models": "@amyeroberts",
    "speech models": "@sanchit-gandhi",
    "graph models": "@clefourrier",
    "flax": "@sanchit-gandhi",
    "generate": "@gante",
    "pipelines": "@Narsil",
    "tensorflow": ["@gante", "@Rocketknight1"],
    "tokenizers": "@ArthurZucker",
    "trainer": ["@muellerzr", "@pacman100"],
    "deepspeed": "@pacman100",
    "ray/raytune": ["@richardliaw", "@amogkam"],
    "Big Model Inference": "@SunMarc",
    "quantization (bitsandbytes, autogpt)": ["@SunMarc", "@younesbelkada"],
    "Documentation": ["@stevhliu", "@MKhalusova"],
    "accelerate": "different repo",
    "datasets": "different repo",
    "diffusers": "different repo",
    "rust tokenizers": "different repo",
    "Flax examples": "@sanchit-gandhi",
    "PyTorch vision examples": "@amyeroberts",
    "PyTorch text examples": "@ArthurZucker",
    "PyTorch speech examples": "@sanchit-gandhi",
    "PyTorch generate examples": "@gante",
    "TensorFlow": "@Rocketknight1",
    "Research projects and examples": "not maintained",
}


from transformers import AutoTokenizer, LlamaForCausalLM

model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

prompt = f"Which of the following topics {list(topic_maintainers_map.keys())} is this issue about:\n{issue['body']}"
inputs = tokenizer(prompt, return_tensors="pt")

# Generate
generate_ids = model.generate(inputs.input_ids, max_length=30)
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]