import json with open("issues_dict.json", "r") as f: issues = json.load(f) topic_maintainers_map ={ "text models": ["@ArthurZucker", "@younesbelkada"], "vision models": "@amyeroberts", "speech models": "@sanchit-gandhi", "graph models": "@clefourrier", "flax": "@sanchit-gandhi", "generate": "@gante", "pipelines": "@Narsil", "tensorflow": ["@gante", "@Rocketknight1"], "tokenizers": "@ArthurZucker", "trainer": ["@muellerzr", "@pacman100"], "deepspeed": "@pacman100", "ray/raytune": ["@richardliaw", "@amogkam"], "Big Model Inference": "@SunMarc", "quantization (bitsandbytes, autogpt)": ["@SunMarc", "@younesbelkada"], "Documentation": ["@stevhliu", "@MKhalusova"], "accelerate": "different repo", "datasets": "different repo", "diffusers": "different repo", "rust tokenizers": "different repo", "Flax examples": "@sanchit-gandhi", "PyTorch vision examples": "@amyeroberts", "PyTorch text examples": "@ArthurZucker", "PyTorch speech examples": "@sanchit-gandhi", "PyTorch generate examples": "@gante", "TensorFlow": "@Rocketknight1", "Research projects and examples": "not maintained", } issue_no = 2781 issue = issues[str(issue_no)] from transformers import AutoTokenizer, LlamaForCausalLM model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf") tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf") # prompt = f"Which of the following topics {list(topic_maintainers_map.keys())} is this issue about:\n{issue['body']}" prompt = f"What is the provided issue about? Pick up to 3 topics from the following list: {list(topic_maintainers_map.keys())} \nIssue:\n{issue['body']}" inputs = tokenizer(prompt, return_tensors="pt") prefix_len = inputs.input_ids.shape[1] # Generate generate_ids = model.generate(inputs.input_ids, max_length=30 + prefix_len) outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] print(outputs[prefix_len:]) print("TITLE", issue["number"] + " " + issue["title"])