File size: 1,297 Bytes
c97d8e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from typing import List, Dict, Tuple
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

def create_prompt(prompt):
    prompt_template = ChatPromptTemplate.from_template(prompt)
    return prompt_template

def split_documents(documents: List[Document]) -> List[Document]:
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=100,
        length_function=len,
        is_separator_regex=False
    )
    split_docs = text_splitter.split_documents(documents)
    total_tokens = sum(len(doc.page_content) for doc in split_docs)  # Approximate token count
    return split_docs, total_tokens

def generate_embeddings(docs: List[Document]) -> List[List[float]]:
    embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")
    embeddings = embeddings_model.embed_documents([doc.page_content for doc in docs])
    total_tokens = sum(len(doc.page_content) for doc in docs)  # Approximate token count
    return embeddings, total_tokens

def create_qamodel(model="gpt-4o-mini", temperature=0):
    qamodel = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0
)
    return qamodel