Spaces:
Paused
Paused
File size: 4,072 Bytes
4c95dc7 31f9732 4c95dc7 d523035 31f9732 d7ef377 4c95dc7 d7ef377 4c95dc7 8187b01 4c95dc7 b2f993e 4c95dc7 d523035 4c95dc7 d523035 4c95dc7 d523035 4c95dc7 8187b01 b2f993e 31f9732 4c95dc7 b2f993e 4c95dc7 8187b01 b2f993e d523035 31f9732 b2f993e 31f9732 8187b01 b2f993e 8187b01 31f9732 8187b01 31f9732 4c95dc7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
from langchain_anthropic import ChatAnthropic
from langchain_openai import ChatOpenAI
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.tracers import LangChainTracer
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore, Qdrant
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from qdrant_client import QdrantClient
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_cohere import CohereRerank
from langchain_core.globals import set_llm_cache
from langchain_core.caches import InMemoryCache
import constants
import os
os.environ["LANGCHAIN_API_KEY"] = constants.LANGCHAIN_API_KEY
os.environ["LANGCHAIN_TRACING_V2"] = str(constants.LANGCHAIN_TRACING_V2)
os.environ["LANGCHAIN_ENDPOINT"] = constants.LANGCHAIN_ENDPOINT
set_llm_cache(InMemoryCache())
tracer = LangChainTracer()
callback_manager = CallbackManager([tracer])
########################
### Chat Models ###
########################
#opus3 = ChatAnthropic(
# api_key=constants.ANTRHOPIC_API_KEY,
# temperature=0,
# model='claude-3-opus-20240229',
# callbacks=callback_manager
#)
#
#sonnet35 = ChatAnthropic(
# api_key=constants.ANTRHOPIC_API_KEY,
# temperature=0,
# model='claude-3-5-sonnet-20240620',
# max_tokens=4096,
# callbacks=callback_manager
#)
gpt4 = ChatOpenAI(
model="gpt-4",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
api_key=constants.OPENAI_API_KEY,
callbacks=callback_manager
)
gpt4o = ChatOpenAI(
model="gpt-4o",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
api_key=constants.OPENAI_API_KEY,
callbacks=callback_manager
)
gpt4o_mini = ChatOpenAI(
model="gpt-4o-mini",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
api_key=constants.OPENAI_API_KEY,
callbacks=callback_manager
)
########################
### Embedding Models ###
########################
#basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
#te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
#######################
### Text Splitters ###
#######################
#semanticChunker = SemanticChunker(
# te3_small,
# breakpoint_threshold_type="percentile"
#)
semanticChunker_tuned = SemanticChunker(
tuned_embeddings,
breakpoint_threshold_type="percentile",
breakpoint_threshold_amount=85
)
#RCTS = RecursiveCharacterTextSplitter(
# # Set a really small chunk size, just to show.
# chunk_size=500,
# chunk_overlap=25,
# length_function=len,
#)
#######################
### Vector Stores ###
#######################
qdrant_client = QdrantClient(url=constants.QDRANT_ENDPOINT, api_key=constants.QDRANT_API_KEY)
#semantic_Qdrant_vs = QdrantVectorStore(
# client=qdrant_client,
# collection_name="docs_from_ripped_urls",
# embedding=te3_small
#)
#
#rcts_Qdrant_vs = QdrantVectorStore(
# client=qdrant_client,
# collection_name="docs_from_ripped_urls_recursive",
# embedding=te3_small
#)
semantic_tuned_Qdrant_vs = QdrantVectorStore(
client=qdrant_client,
collection_name="docs_from_ripped_urls_semantic_tuned",
embedding=tuned_embeddings
)
#######################
### Retrievers ###
#######################
semantic_tuned_retriever = semantic_tuned_Qdrant_vs.as_retriever(search_kwargs={"k" : 10})
compressor = CohereRerank(model="rerank-english-v3.0")
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor, base_retriever=semantic_tuned_retriever
) |