Spaces:

bala1802
/

FRIENDS-GPT

Sleeping

FRIENDS-GPT / head.py

Upload 7 files

dabde41 7 months ago

No virus

1.3 kB

	import torch
	import torch.nn as nn
	from torch.nn import functional as F

	import gpt_config as config

	class Head(nn.Module):
	""" one head of self-attention """

	def __init__(self, head_size):
	super().__init__()
	self.key = nn.Linear(config.n_embd, head_size, bias=False)
	self.query = nn.Linear(config.n_embd, head_size, bias=False)
	self.value = nn.Linear(config.n_embd, head_size, bias=False)
	self.register_buffer('tril', torch.tril(torch.ones(config.block_size, config.block_size)))

	self.dropout = nn.Dropout(config.dropout)

	def forward(self, x):
	# input of size (batch, time-step, channels)
	# output of size (batch, time-step, head size)
	B,T,C = x.shape
	k = self.key(x) # (B,T,hs)
	q = self.query(x) # (B,T,hs)
	# compute attention scores ("affinities")
	wei = q @ k.transpose(-2,-1) * k.shape[-1]**-0.5 # (B, T, hs) @ (B, hs, T) -> (B, T, T)
	wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf')) # (B, T, T)
	wei = F.softmax(wei, dim=-1) # (B, T, T)
	wei = self.dropout(wei)
	# perform the weighted aggregation of the values
	v = self.value(x) # (B,T,hs)
	out = wei @ v # (B, T, T) @ (B, T, hs) -> (B, T, hs)
	return out