yrobel-lima commited on
Commit
561f679
1 Parent(s): 7a29175

Update rag/helpers.py

Browse files
Files changed (1) hide show
  1. rag/helpers.py +57 -57
rag/helpers.py CHANGED
@@ -1,57 +1,57 @@
1
- import logging
2
- import os
3
- import random
4
- from datetime import datetime
5
- from functools import lru_cache
6
- from typing import Sequence
7
- from zoneinfo import ZoneInfo
8
-
9
- import langsmith
10
- from langchain_core.documents import Document
11
- from langchain_community.document_transformers import LongContextReorder
12
- from langchain.retrievers.document_compressors import FlashrankRerank
13
-
14
- logging.basicConfig(level=logging.ERROR)
15
-
16
-
17
- class DocumentFormatter:
18
- def __init__(self, prefix: str):
19
- self.prefix = prefix
20
-
21
- def __call__(self, docs: list[Document]) -> str:
22
- return "\n---\n".join(
23
- [
24
- f"- {self.prefix} {i+1}:\n\n\t" + d.page_content
25
- for i, d in enumerate(docs)
26
- ]
27
- )
28
-
29
-
30
- def get_datetime() -> str:
31
- return datetime.now(ZoneInfo("America/Vancouver")).strftime("%A, %Y-%b-%d %H:%M:%S")
32
-
33
-
34
- def reorder_documents(docs: list[Document]) -> Sequence[Document]:
35
- return LongContextReorder().transform_documents(docs)
36
-
37
-
38
- def randomize_documents(documents: list[Document]) -> list[Document]:
39
- random.shuffle(documents)
40
- return documents
41
-
42
-
43
- def create_langsmith_client():
44
- os.environ["LANGCHAIN_TRACING_V2"] = "true"
45
- os.environ["LANGCHAIN_PROJECT"] = "admin-ai-assistant"
46
- os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
47
- langsmith_api_key = os.getenv("LANGCHAIN_API_KEY")
48
- if not langsmith_api_key:
49
- raise EnvironmentError("Missing environment variable: LANGCHAIN_API_KEY")
50
- return langsmith.Client()
51
-
52
-
53
- @lru_cache(maxsize=1)
54
- def get_reranker(
55
- top_n: int = 3, model: str = "ms-marco-MiniLM-L-12-v2"
56
- ) -> FlashrankRerank:
57
- return FlashrankRerank(top_n=top_n, model=model)
 
1
+ import logging
2
+ import os
3
+ import random
4
+ from datetime import datetime
5
+ from functools import lru_cache
6
+ from typing import Sequence
7
+ from zoneinfo import ZoneInfo
8
+
9
+ import langsmith
10
+ from langchain_core.documents import Document
11
+ from langchain_community.document_transformers import LongContextReorder
12
+ from langchain.retrievers.document_compressors import FlashrankRerank
13
+
14
+ logging.basicConfig(level=logging.ERROR)
15
+
16
+
17
+ class DocumentFormatter:
18
+ def __init__(self, prefix: str):
19
+ self.prefix = prefix
20
+
21
+ def __call__(self, docs: list[Document]) -> str:
22
+ return "\n---\n".join(
23
+ [
24
+ f"- {self.prefix} {i+1}:\n\n\t" + d.page_content
25
+ for i, d in enumerate(docs)
26
+ ]
27
+ )
28
+
29
+
30
+ def get_datetime() -> str:
31
+ return datetime.now(ZoneInfo("America/Vancouver")).strftime("%A, %Y-%b-%d %H:%M:%S")
32
+
33
+
34
+ def reorder_documents(docs: list[Document]) -> Sequence[Document]:
35
+ return LongContextReorder().transform_documents(docs)
36
+
37
+
38
+ def randomize_documents(documents: list[Document]) -> list[Document]:
39
+ random.shuffle(documents)
40
+ return documents
41
+
42
+
43
+ def create_langsmith_client():
44
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
45
+ os.environ["LANGCHAIN_PROJECT"] = "talltree-ai-assistant"
46
+ os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
47
+ langsmith_api_key = os.getenv("LANGCHAIN_API_KEY")
48
+ if not langsmith_api_key:
49
+ raise EnvironmentError("Missing environment variable: LANGCHAIN_API_KEY")
50
+ return langsmith.Client()
51
+
52
+
53
+ @lru_cache(maxsize=1)
54
+ def get_reranker(
55
+ top_n: int = 3, model: str = "ms-marco-MiniLM-L-12-v2"
56
+ ) -> FlashrankRerank:
57
+ return FlashrankRerank(top_n=top_n, model=model)