Spaces:
Sleeping
Sleeping
File size: 791 Bytes
c44d252 9e8bd13 c44d252 9e8bd13 c44d252 9e8bd13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import os
import re
from typing import List, Optional
import openai
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_datasets
class LLM_Middleware():
hf_key: str
dataset
def __init__(self, openai_key, hf) -> None:
openai.key = openai_key
self.hf_key = hf
'''
function for loading the dataset using hf trainer.
'''
def loadDataset(self,datasetName: str):
self.dataset = load_datasets(datasetName)
return self.dataset
def TokenizerFunction(modelName: str, dataset):
tokenizer = AutoTokenizer.from_pretrained(modelName)
## as its the JSON function, we need to specify other function in order to be specific.
tokenizer(dataset["text"], padding="max_length", truncation=True)
|