Spaces:
Paused
Paused
angry-meow
commited on
Commit
·
8187b01
1
Parent(s):
31f9732
tidy up models file
Browse files
models.py
CHANGED
@@ -20,7 +20,9 @@ os.environ["LANGCHAIN_ENDPOINT"] = constants.LANGCHAIN_ENDPOINT
|
|
20 |
tracer = LangChainTracer()
|
21 |
callback_manager = CallbackManager([tracer])
|
22 |
|
23 |
-
|
|
|
|
|
24 |
|
25 |
opus3 = ChatAnthropic(
|
26 |
api_key=constants.ANTRHOPIC_API_KEY,
|
@@ -67,12 +69,20 @@ gpt4o_mini = ChatOpenAI(
|
|
67 |
callbacks=callback_manager
|
68 |
)
|
69 |
|
|
|
|
|
|
|
|
|
70 |
basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
|
71 |
|
72 |
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
|
73 |
|
74 |
te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
|
75 |
|
|
|
|
|
|
|
|
|
76 |
semanticChunker = SemanticChunker(
|
77 |
te3_small,
|
78 |
breakpoint_threshold_type="percentile"
|
@@ -91,14 +101,35 @@ RCTS = RecursiveCharacterTextSplitter(
|
|
91 |
length_function=len,
|
92 |
)
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
semantic_tuned_Qdrant_vs = QdrantVectorStore(
|
95 |
client=qdrant_client,
|
96 |
collection_name="docs_from_ripped_urls_semantic_tuned",
|
97 |
embedding=tuned_embeddings
|
98 |
)
|
|
|
|
|
|
|
|
|
99 |
semantic_tuned_retriever = semantic_tuned_Qdrant_vs.as_retriever(search_kwargs={"k" : 10})
|
100 |
|
101 |
-
#compression
|
102 |
compressor = CohereRerank(model="rerank-english-v3.0")
|
103 |
compression_retriever = ContextualCompressionRetriever(
|
104 |
base_compressor=compressor, base_retriever=semantic_tuned_retriever
|
|
|
20 |
tracer = LangChainTracer()
|
21 |
callback_manager = CallbackManager([tracer])
|
22 |
|
23 |
+
########################
|
24 |
+
### Chat Models ###
|
25 |
+
########################
|
26 |
|
27 |
opus3 = ChatAnthropic(
|
28 |
api_key=constants.ANTRHOPIC_API_KEY,
|
|
|
69 |
callbacks=callback_manager
|
70 |
)
|
71 |
|
72 |
+
########################
|
73 |
+
### Embedding Models ###
|
74 |
+
########################
|
75 |
+
|
76 |
basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
|
77 |
|
78 |
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
|
79 |
|
80 |
te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
|
81 |
|
82 |
+
#######################
|
83 |
+
### Text Splitters ###
|
84 |
+
#######################
|
85 |
+
|
86 |
semanticChunker = SemanticChunker(
|
87 |
te3_small,
|
88 |
breakpoint_threshold_type="percentile"
|
|
|
101 |
length_function=len,
|
102 |
)
|
103 |
|
104 |
+
#######################
|
105 |
+
### Vector Stores ###
|
106 |
+
#######################
|
107 |
+
|
108 |
+
qdrant_client = QdrantClient(url=constants.QDRANT_ENDPOINT, api_key=constants.QDRANT_API_KEY)
|
109 |
+
|
110 |
+
semantic_Qdrant_vs = QdrantVectorStore(
|
111 |
+
client=qdrant_client,
|
112 |
+
collection_name="docs_from_ripped_urls",
|
113 |
+
embedding=te3_small
|
114 |
+
)
|
115 |
+
|
116 |
+
rcts_Qdrant_vs = QdrantVectorStore(
|
117 |
+
client=qdrant_client,
|
118 |
+
collection_name="docs_from_ripped_urls_recursive",
|
119 |
+
embedding=te3_small
|
120 |
+
)
|
121 |
+
|
122 |
semantic_tuned_Qdrant_vs = QdrantVectorStore(
|
123 |
client=qdrant_client,
|
124 |
collection_name="docs_from_ripped_urls_semantic_tuned",
|
125 |
embedding=tuned_embeddings
|
126 |
)
|
127 |
+
|
128 |
+
#######################
|
129 |
+
### Retrievers ###
|
130 |
+
#######################
|
131 |
semantic_tuned_retriever = semantic_tuned_Qdrant_vs.as_retriever(search_kwargs={"k" : 10})
|
132 |
|
|
|
133 |
compressor = CohereRerank(model="rerank-english-v3.0")
|
134 |
compression_retriever = ContextualCompressionRetriever(
|
135 |
base_compressor=compressor, base_retriever=semantic_tuned_retriever
|