--- license: mit tags: - code language: - en --- # What does this model do? This model converts the natural language input to MongoDB (MQL) query. It is a fine-tuned CodeT5+ 220M. This model is a part of nl2query repository which is present at https://github.com/Chirayu-Tripathi/nl2query You can use this model via the github repository or via following code. More information can be found on the repository. ```python from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch model = AutoModelForSeq2SeqLM.from_pretrained("Chirayu/nl2mongo") tokenizer = AutoTokenizer.from_pretrained("Chirayu/nl2mongo") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) textual_query = '''mongo: which cabinet has average age less than 21? | titanic : _id, passengerid, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked''' def generate_query( textual_query: str, num_beams: int = 10, max_length: int = 128, repetition_penalty: int = 2.5, length_penalty: int = 1, early_stopping: bool = True, top_p: int = 0.95, top_k: int = 50, num_return_sequences: int = 1, ) -> str: input_ids = tokenizer.encode( textual_query, return_tensors="pt", add_special_tokens=True ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_ids = input_ids.to(device) generated_ids = model.generate( input_ids=input_ids, num_beams=num_beams, max_length=max_length, repetition_penalty=repetition_penalty, length_penalty=length_penalty, early_stopping=early_stopping, top_p=top_p, top_k=top_k, num_return_sequences=num_return_sequences, ) query = [ tokenizer.decode( generated_id, skip_special_tokens=True, clean_up_tokenization_spaces=True, ) for generated_id in generated_ids ][0] return query ```