Imran1
/

embadding

Inference Endpoints

Model card Files Files and versions Community

embadding / README.md

Imran1's picture

Update README.md

52f0805 verified about 1 month ago

|

No virus

2.34 kB

	---
	license: mit
	---
	# Model using

	```python
	from transformers import AutoConfig, AutoTokenizer
	from torch import nn
	import torch.nn.functional as F
	import torch
	# First, define your custom model class again
	class HFCustomBertModel(nn.Module):
	def __init__(self, config):
	super().__init__()
	self.bert = BertModel(config)
	self.pooler = nn.Sequential(
	nn.Linear(config.hidden_size, config.hidden_size),
	nn.Tanh()
	)

	def forward(self, input_ids, attention_mask=None, token_type_ids=None):
	outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
	pooled_output = self.pooler(outputs.pooler_output)
	return pooled_output
	def load_custom_model_and_tokenizer(model_path):
	# Load the config
	config = AutoConfig.from_pretrained(model_path)

	# Initialize the custom model with the config
	model = HFCustomBertModel(config)
	# Load the tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_path)

	return model, tokenizer

	# Usage
	model_path = "Imran1/embadding"
	model, tokenizer = load_custom_model_and_tokenizer(model_path)



	queries = ["how much protein should a female eat"]
	documents = ["As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day."]

	model.eval() # Set the model to evaluation mode

	with torch.no_grad():
	# Tokenize and encode the queries and documents
	query_inputs = tokenizer(queries, padding=True, truncation=True, return_tensors="pt")
	document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors="pt")

	# Get embeddings
	query_embeddings = model(**query_inputs)
	document_embeddings = model(**document_inputs)

	# Normalize embeddings
	query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
	document_embeddings = F.normalize(document_embeddings, p=2, dim=1)

	# Calculate cosine similarity
	scores = torch.matmul(query_embeddings, document_embeddings.transpose(0, 1))

	print(f"Similarity score: {scores.item():.4f}")
	Similarity score: 0.9605
	```