aie3-autograder / promptsplitembed.py
Dobin Yim
modular files
c97d8e1
from typing import List, Dict, Tuple
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
def create_prompt(prompt):
prompt_template = ChatPromptTemplate.from_template(prompt)
return prompt_template
def split_documents(documents: List[Document]) -> List[Document]:
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100,
length_function=len,
is_separator_regex=False
)
split_docs = text_splitter.split_documents(documents)
total_tokens = sum(len(doc.page_content) for doc in split_docs) # Approximate token count
return split_docs, total_tokens
def generate_embeddings(docs: List[Document]) -> List[List[float]]:
embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")
embeddings = embeddings_model.embed_documents([doc.page_content for doc in docs])
total_tokens = sum(len(doc.page_content) for doc in docs) # Approximate token count
return embeddings, total_tokens
def create_qamodel(model="gpt-4o-mini", temperature=0):
qamodel = ChatOpenAI(
model="gpt-4o-mini",
temperature=0
)
return qamodel