eibeel commited on
Commit
00c003d
1 Parent(s): 780cc24

Update GPT_RAG.py

Browse files
Files changed (1) hide show
  1. GPT_RAG.py +5 -34
GPT_RAG.py CHANGED
@@ -1,12 +1,4 @@
1
- # -*- coding: utf-8 -*-
2
- """nomic_embedding_rag.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1vAQoZx_07yU0nVCkFxJQkcVeymgNpzFF
8
  """
9
-
10
  !pip install nomic
11
  !pip install --upgrade langchain
12
 
@@ -15,19 +7,9 @@ Original file is located at
15
  ! nomic login nk-bqukmTuFJHW8tgXzXXBw1qDL062-pth-ACecKP7CkXs
16
 
17
  ! pip install -U langchain-nomic langchain_community tiktoken langchain-openai chromadb langchain
18
-
19
- # Optional: LangSmith API keys
20
- import os
21
-
22
- os.environ["LANGCHAIN_TRACING_V2"] = "true"
23
- os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
24
- os.environ["LANGCHAIN_API_KEY"] = "api_key"
25
-
26
- """## Document Loading
27
-
28
- Let's test 3 interesting blog posts.
29
  """
30
 
 
31
  import json
32
  from langchain_community.document_loaders import JSONLoader
33
  from langchain.docstore.document import Document
@@ -64,18 +46,8 @@ for conversation in data:
64
  for doc in docs_list:
65
  print(doc.page_content, doc.metadata)
66
 
67
- """from langchain_community.document_loaders import WebBaseLoader
68
-
69
- urls = [
70
- "https://lilianweng.github.io/posts/2023-06-23-agent/",
71
- "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
72
- "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
73
- ]"""
74
-
75
- """docs = [WebBaseLoader(url).load() for url in urls]""
76
-
77
- """docs_list = [item for sublist in docs for item in sublist]
78
 
 
79
  ## Splitting
80
 
81
  Long context retrieval,
@@ -94,6 +66,7 @@ doc_splits = text_splitter.split_documents(docs_list)
94
  for split in doc_splits:
95
  print(split.page_content, split.metadata)
96
 
 
97
  import tiktoken
98
 
99
  encoding = tiktoken.get_encoding("cl100k_base")
@@ -122,10 +95,8 @@ vectorstore = Chroma.from_documents(
122
  )
123
  retriever = vectorstore.as_retriever()
124
 
125
- """## RAG Chain
126
 
127
- We can use the
128
- """
129
 
130
  import os
131
  from sklearn.metrics import precision_score, recall_score, f1_score
@@ -146,7 +117,7 @@ Question: {question}
146
  prompt = ChatPromptTemplate.from_template(template)
147
 
148
  # LLM API
149
- model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")
150
 
151
  # Placeholder para `retriever`
152
  class DummyRetriever:
 
 
 
 
 
 
 
 
1
  """
 
2
  !pip install nomic
3
  !pip install --upgrade langchain
4
 
 
7
  ! nomic login nk-bqukmTuFJHW8tgXzXXBw1qDL062-pth-ACecKP7CkXs
8
 
9
  ! pip install -U langchain-nomic langchain_community tiktoken langchain-openai chromadb langchain
 
 
 
 
 
 
 
 
 
 
 
10
  """
11
 
12
+
13
  import json
14
  from langchain_community.document_loaders import JSONLoader
15
  from langchain.docstore.document import Document
 
46
  for doc in docs_list:
47
  print(doc.page_content, doc.metadata)
48
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ """
51
  ## Splitting
52
 
53
  Long context retrieval,
 
66
  for split in doc_splits:
67
  print(split.page_content, split.metadata)
68
 
69
+
70
  import tiktoken
71
 
72
  encoding = tiktoken.get_encoding("cl100k_base")
 
95
  )
96
  retriever = vectorstore.as_retriever()
97
 
98
+ # RAG Chain
99
 
 
 
100
 
101
  import os
102
  from sklearn.metrics import precision_score, recall_score, f1_score
 
117
  prompt = ChatPromptTemplate.from_template(template)
118
 
119
  # LLM API
120
+ model = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
121
 
122
  # Placeholder para `retriever`
123
  class DummyRetriever: