DREAM
Collection
DREAM: Dense Retrieval Embeddings via Autoregressive Modeling • 3 items • Updated
How to use yixuantt/DREAM-1B with PEFT:
from peft import PeftModel
from transformers import AutoModelForCausalLM
base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
model = PeftModel.from_pretrained(base_model, "yixuantt/DREAM-1B")DREAM (DREAM-1B) is a LoRA adapter for dense retrieval embeddings trained with autoregressive language-model supervision, as presented in DREAM: Dense Retrieval Embeddings via Autoregressive Modeling.
The official code is available in the DREAM GitHub Repository.
This repository contains the PEFT adapter only. Load it together with the base model: meta-llama/Llama-3.2-1B.
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
model_id = "yixuantt/DREAM-1B"
base_id = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
base = AutoModelForCausalLM.from_pretrained(
base_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
model = PeftModel.from_pretrained(base, model_id)
model.eval()
@torch.no_grad()
def encode(texts, max_length=512):
inputs = tokenizer(
texts,
padding=True,
truncation=True,
max_length=max_length,
return_tensors="pt",
).to(model.device)
outputs = model(**inputs, output_hidden_states=True, use_cache=False)
hidden = outputs.hidden_states[-1]
# Pool the last non-padding token. This works for both left and right padding.
last_idx = inputs["attention_mask"].size(1) - 1 - inputs["attention_mask"].flip(dims=[1]).argmax(dim=1)
emb = hidden[torch.arange(hidden.size(0), device=hidden.device), last_idx]
return F.normalize(emb.float(), p=2, dim=-1)
queries = encode(["What is DREAM?"])
docs = encode(["DREAM trains dense retrievers with autoregressive supervision."])
scores = queries @ docs.T
print(scores)
Base model
meta-llama/Llama-3.2-1B