Spaces:
Runtime error
Runtime error
heenakhilwani
commited on
Commit
β’
b8a28f6
1
Parent(s):
daca530
Upload folder using huggingface_hub
Browse files- .Trash-1000/files/Index_update.ipynb +96 -0
- .Trash-1000/files/Pinecone_upsert.ipynb +189 -0
- .Trash-1000/files/Untitled 1.ipynb +6 -0
- .Trash-1000/files/Untitled.ipynb +361 -0
- .Trash-1000/files/Untitled1.ipynb +96 -0
- .Trash-1000/info/Index_update.ipynb.trashinfo +3 -0
- .Trash-1000/info/Pinecone_upsert.ipynb.trashinfo +3 -0
- .Trash-1000/info/Untitled 1.ipynb.trashinfo +3 -0
- .Trash-1000/info/Untitled.ipynb.trashinfo +3 -0
- .Trash-1000/info/Untitled1.ipynb.trashinfo +3 -0
- .ipynb_checkpoints/KTBOT-checkpoint.ipynb +0 -0
- KTBOT.ipynb +1084 -0
- README.md +2 -8
- cacert.pem +0 -0
.Trash-1000/files/Index_update.ipynb
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"id": "6c4c55f1",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"#deletion and creation of index\n",
|
11 |
+
"\n",
|
12 |
+
"import requests\n",
|
13 |
+
"\n",
|
14 |
+
"\n",
|
15 |
+
"api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
|
16 |
+
"base_url = \"https://api.pinecone.io\"\n",
|
17 |
+
"\n",
|
18 |
+
"# Index name\n",
|
19 |
+
"index_name = \"ktbot\"\n",
|
20 |
+
"\n",
|
21 |
+
"# Construct the delete URL for the index\n",
|
22 |
+
"delete_url = f\"{base_url}/indexes/{index_name}\"\n",
|
23 |
+
"\n",
|
24 |
+
"# Headers with API key\n",
|
25 |
+
"headers = {\n",
|
26 |
+
" \"Content-Type\": \"application/json\",\n",
|
27 |
+
" \"Api-Key\": api_key\n",
|
28 |
+
"}\n",
|
29 |
+
"\n",
|
30 |
+
"# Send the DELETE request to delete the existing index\n",
|
31 |
+
"response_delete = requests.delete(delete_url, headers=headers)\n",
|
32 |
+
"print(response_delete.status_code)\n",
|
33 |
+
"# Check the response status\n",
|
34 |
+
"if response_delete.status_code in (204, 202, 404):\n",
|
35 |
+
" print(f\"Index '{index_name}' deleted successfully.\")\n",
|
36 |
+
"else:\n",
|
37 |
+
" print(f\"Failed to delete index '{index_name}': {response_delete.status_code} - {response_delete.text}\")\n",
|
38 |
+
"\n",
|
39 |
+
"\n",
|
40 |
+
"\n",
|
41 |
+
"\n",
|
42 |
+
"# Construct the URL for creating the index\n",
|
43 |
+
"create_index_url = f\"{base_url}/indexes\"\n",
|
44 |
+
"\n",
|
45 |
+
"\n",
|
46 |
+
"# Define the payload for creating the index\n",
|
47 |
+
"payload = {\n",
|
48 |
+
" \"name\": index_name,\n",
|
49 |
+
" \"dimension\": 1536,\n",
|
50 |
+
" \"metric\": \"cosine\",\n",
|
51 |
+
" \"spec\": {\n",
|
52 |
+
" \"serverless\": {\n",
|
53 |
+
" \"cloud\": \"aws\",\n",
|
54 |
+
" \"region\": \"us-east-1\"\n",
|
55 |
+
" }\n",
|
56 |
+
" }\n",
|
57 |
+
"}\n",
|
58 |
+
"\n",
|
59 |
+
"# Send the POST request to create the index\n",
|
60 |
+
"response = requests.post(create_index_url, headers=headers, json=payload)\n",
|
61 |
+
"\n",
|
62 |
+
"# Check the response status\n",
|
63 |
+
"if response.status_code == 201:\n",
|
64 |
+
" print(f\"Index '{index_name}' created successfully.\")\n",
|
65 |
+
" response_json = json.loads(response.text)\n",
|
66 |
+
" host = response_json[\"host\"]\n",
|
67 |
+
" # Print the host\n",
|
68 |
+
" print(\"Host for index 'ktbot':\", host)\n",
|
69 |
+
"\n",
|
70 |
+
"else:\n",
|
71 |
+
" print(f\"Failed to create index '{index_name}': {response.status_code} - {response.text}\")\n"
|
72 |
+
]
|
73 |
+
}
|
74 |
+
],
|
75 |
+
"metadata": {
|
76 |
+
"kernelspec": {
|
77 |
+
"display_name": "conda_python3",
|
78 |
+
"language": "python",
|
79 |
+
"name": "conda_python3"
|
80 |
+
},
|
81 |
+
"language_info": {
|
82 |
+
"codemirror_mode": {
|
83 |
+
"name": "ipython",
|
84 |
+
"version": 3
|
85 |
+
},
|
86 |
+
"file_extension": ".py",
|
87 |
+
"mimetype": "text/x-python",
|
88 |
+
"name": "python",
|
89 |
+
"nbconvert_exporter": "python",
|
90 |
+
"pygments_lexer": "ipython3",
|
91 |
+
"version": "3.10.14"
|
92 |
+
}
|
93 |
+
},
|
94 |
+
"nbformat": 4,
|
95 |
+
"nbformat_minor": 5
|
96 |
+
}
|
.Trash-1000/files/Pinecone_upsert.ipynb
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"id": "1459dedf",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"Number of documents loaded: 3\n",
|
14 |
+
"Number of split documents: 9\n",
|
15 |
+
"Data upserted successfully.\n"
|
16 |
+
]
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"name": "stderr",
|
20 |
+
"output_type": "stream",
|
21 |
+
"text": [
|
22 |
+
"/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/urllib3/connectionpool.py:1061: InsecureRequestWarning: Unverified HTTPS request is being made to host 'ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
|
23 |
+
" warnings.warn(\n"
|
24 |
+
]
|
25 |
+
}
|
26 |
+
],
|
27 |
+
"source": [
|
28 |
+
"import os\n",
|
29 |
+
"from docx import Document\n",
|
30 |
+
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
31 |
+
"from openai import AzureOpenAI\n",
|
32 |
+
"import httpx\n",
|
33 |
+
"import requests\n",
|
34 |
+
"import json\n",
|
35 |
+
"import openai\n",
|
36 |
+
"import pinecone\n",
|
37 |
+
"from langchain.document_loaders import DirectoryLoader\n",
|
38 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
39 |
+
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
40 |
+
"from langchain.vectorstores import Pinecone\n",
|
41 |
+
"from langchain.llms import OpenAI\n",
|
42 |
+
"from langchain.chains.question_answering import load_qa_chain\n",
|
43 |
+
"import boto3\n",
|
44 |
+
"# Initialize a session using Amazon S3\n",
|
45 |
+
"s3 = boto3.client('s3')\n",
|
46 |
+
"import io\n",
|
47 |
+
"\n",
|
48 |
+
"def load_docx(file_content):\n",
|
49 |
+
" # Read the DOCX file content and return the text\n",
|
50 |
+
" doc = Document(io.BytesIO(file_content))\n",
|
51 |
+
" full_text = []\n",
|
52 |
+
" for para in doc.paragraphs:\n",
|
53 |
+
" full_text.append(para.text)\n",
|
54 |
+
" return '\\n'.join(full_text)\n",
|
55 |
+
"\n",
|
56 |
+
"def load_docs_from_s3(bucket_name, prefix):\n",
|
57 |
+
" documents = []\n",
|
58 |
+
" try:\n",
|
59 |
+
" # List all objects in the S3 bucket with the specified prefix\n",
|
60 |
+
" response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)\n",
|
61 |
+
" for obj in response.get('Contents', []):\n",
|
62 |
+
" if obj['Key'].endswith('.docx'):\n",
|
63 |
+
" # Read the file content from S3\n",
|
64 |
+
" file_obj = s3.get_object(Bucket=bucket_name, Key=obj['Key'])\n",
|
65 |
+
" file_content = file_obj['Body'].read()\n",
|
66 |
+
" text = load_docx(file_content)\n",
|
67 |
+
" if text:\n",
|
68 |
+
" documents.append((obj['Key'], text)) # Store filename and text together\n",
|
69 |
+
" except Exception as e:\n",
|
70 |
+
" print(f\"Error accessing S3 bucket: {e}\")\n",
|
71 |
+
" return documents\n",
|
72 |
+
"\n",
|
73 |
+
"# Example usage\n",
|
74 |
+
"\n",
|
75 |
+
"\n",
|
76 |
+
"bucket_name = 'emd-forecast'\n",
|
77 |
+
"prefix = 'gtn/input/knowledge_base/' # Replace with your actual prefix\n",
|
78 |
+
"\n",
|
79 |
+
"\n",
|
80 |
+
" \n",
|
81 |
+
"# Split documents into chunks\n",
|
82 |
+
"def split_docs(documents, chunk_size=1000, chunk_overlap=20):\n",
|
83 |
+
" text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)\n",
|
84 |
+
" docs = []\n",
|
85 |
+
" for doc_id, doc in documents:\n",
|
86 |
+
" splits = text_splitter.split_text(doc)\n",
|
87 |
+
" docs.extend([(doc_id, split) for split in splits])\n",
|
88 |
+
" return docs\n",
|
89 |
+
"\n",
|
90 |
+
"# Load documents from a directory\n",
|
91 |
+
"documents = load_docs_from_s3(bucket_name, prefix)\n",
|
92 |
+
"print(f\"Number of documents loaded: {len([d[0] for d in documents])}\")\n",
|
93 |
+
"\n",
|
94 |
+
"# Split documents into chunks\n",
|
95 |
+
"docs = split_docs(documents)\n",
|
96 |
+
"print(f\"Number of split documents: {len(docs)}\")\n",
|
97 |
+
"\n",
|
98 |
+
"# Initialize HTTP client\n",
|
99 |
+
"httpx_client = httpx.Client(http2=True, verify='cacert.pem')\n",
|
100 |
+
"\n",
|
101 |
+
"# Initialize AzureOpenAI client with the HTTP client\n",
|
102 |
+
"client = AzureOpenAI(\n",
|
103 |
+
" azure_endpoint=\"https://api.nlp.dev.uptimize.merckgroup.com\",\n",
|
104 |
+
" api_key='c6140592-6c65-4261-a959-2e2ba099526d',\n",
|
105 |
+
" api_version=\"2023-09-01-preview\",\n",
|
106 |
+
" http_client=httpx_client\n",
|
107 |
+
")\n",
|
108 |
+
"\n",
|
109 |
+
"# Generate embeddings for all document chunks\n",
|
110 |
+
"embeddings = []\n",
|
111 |
+
"\n",
|
112 |
+
"for doc_id, doc in docs:\n",
|
113 |
+
" text = doc\n",
|
114 |
+
" embedding_response = client.embeddings.create(input=text, model=\"text-embedding-ada-002-v2\")\n",
|
115 |
+
" if embedding_response is not None:\n",
|
116 |
+
" embedding = embedding_response.data[0].embedding\n",
|
117 |
+
" embeddings.append((embedding, doc_id, text))\n",
|
118 |
+
"\n",
|
119 |
+
"api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
|
120 |
+
"base_url = \"https://ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io\"\n",
|
121 |
+
"\n",
|
122 |
+
"delete_url = f\"{base_url}/vectors/delete\"\n",
|
123 |
+
"\n",
|
124 |
+
"upsert_endpoint = f\"{base_url}/vectors/upsert\"\n",
|
125 |
+
"\n",
|
126 |
+
"# Headers with API key\n",
|
127 |
+
"headers = {\n",
|
128 |
+
" \"Content-Type\": \"application/json\",\n",
|
129 |
+
" \"Api-Key\": api_key\n",
|
130 |
+
"}\n",
|
131 |
+
"\n",
|
132 |
+
"dimension = 1536\n",
|
133 |
+
"for embed, doc_id, text in embeddings:\n",
|
134 |
+
" assert len(embed) == dimension, \"Embedding dimension mismatch\"\n",
|
135 |
+
"\n",
|
136 |
+
"# Prepare the data to be added to the index\n",
|
137 |
+
"data = {\n",
|
138 |
+
" \"vectors\": [\n",
|
139 |
+
" {\n",
|
140 |
+
" \"id\": f\"{doc_id}-{i}\", # Unique id for each chunk\n",
|
141 |
+
" \"values\": embed, # Convert numpy array to list\n",
|
142 |
+
" \"metadata\": {\"text\": text[:500]} # Store the first 500 characters of the text for preview\n",
|
143 |
+
" }\n",
|
144 |
+
" for i, (embed, doc_id, text) in enumerate(embeddings)\n",
|
145 |
+
" ]\n",
|
146 |
+
"}\n",
|
147 |
+
"\n",
|
148 |
+
"# Make the POST request to upsert data\n",
|
149 |
+
"response = requests.post(upsert_endpoint, headers=headers, json=data, verify=False)\n",
|
150 |
+
"\n",
|
151 |
+
"# Check if the request was successful\n",
|
152 |
+
"if response.status_code == 200:\n",
|
153 |
+
" print(\"Data upserted successfully.\")\n",
|
154 |
+
"else:\n",
|
155 |
+
" print(f\"Failed to upsert data: {response.status_code} - {response.reason}\")\n",
|
156 |
+
" print(\"Response content:\", response.text)\n"
|
157 |
+
]
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"cell_type": "code",
|
161 |
+
"execution_count": null,
|
162 |
+
"id": "0d07c356",
|
163 |
+
"metadata": {},
|
164 |
+
"outputs": [],
|
165 |
+
"source": []
|
166 |
+
}
|
167 |
+
],
|
168 |
+
"metadata": {
|
169 |
+
"kernelspec": {
|
170 |
+
"display_name": "conda_pytorch_p310",
|
171 |
+
"language": "python",
|
172 |
+
"name": "conda_pytorch_p310"
|
173 |
+
},
|
174 |
+
"language_info": {
|
175 |
+
"codemirror_mode": {
|
176 |
+
"name": "ipython",
|
177 |
+
"version": 3
|
178 |
+
},
|
179 |
+
"file_extension": ".py",
|
180 |
+
"mimetype": "text/x-python",
|
181 |
+
"name": "python",
|
182 |
+
"nbconvert_exporter": "python",
|
183 |
+
"pygments_lexer": "ipython3",
|
184 |
+
"version": "3.10.14"
|
185 |
+
}
|
186 |
+
},
|
187 |
+
"nbformat": 4,
|
188 |
+
"nbformat_minor": 5
|
189 |
+
}
|
.Trash-1000/files/Untitled 1.ipynb
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [],
|
3 |
+
"metadata": {},
|
4 |
+
"nbformat": 4,
|
5 |
+
"nbformat_minor": 5
|
6 |
+
}
|
.Trash-1000/files/Untitled.ipynb
ADDED
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "4b645a21",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"My public IP address is: 52.54.134.213\n"
|
14 |
+
]
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"source": [
|
18 |
+
"from requests import get\n",
|
19 |
+
"ip = get('https://api.ipify.org').content.decode('utf8')\n",
|
20 |
+
"print('My public IP address is: {}'.format(ip))"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "code",
|
25 |
+
"execution_count": 4,
|
26 |
+
"id": "35eed6ac",
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [
|
29 |
+
{
|
30 |
+
"name": "stdout",
|
31 |
+
"output_type": "stream",
|
32 |
+
"text": [
|
33 |
+
"Collecting docx\n",
|
34 |
+
" Downloading docx-0.2.4.tar.gz (54 kB)\n",
|
35 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m54.9/54.9 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
36 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
37 |
+
"\u001b[?25hCollecting lxml (from docx)\n",
|
38 |
+
" Downloading lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n",
|
39 |
+
"Requirement already satisfied: Pillow>=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from docx) (10.2.0)\n",
|
40 |
+
"Downloading lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.0 MB)\n",
|
41 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
42 |
+
"\u001b[?25hBuilding wheels for collected packages: docx\n",
|
43 |
+
" Building wheel for docx (setup.py) ... \u001b[?25ldone\n",
|
44 |
+
"\u001b[?25h Created wheel for docx: filename=docx-0.2.4-py3-none-any.whl size=53895 sha256=f41f957c2ae37f6fa8c9bc84a51bf04272a0b7723ee075b4ad795c4f26527ee1\n",
|
45 |
+
" Stored in directory: /home/ec2-user/.cache/pip/wheels/81/f5/1d/e09ba2c1907a43a4146d1189ae4733ca1a3bfe27ee39507767\n",
|
46 |
+
"Successfully built docx\n",
|
47 |
+
"Installing collected packages: lxml, docx\n",
|
48 |
+
"Successfully installed docx-0.2.4 lxml-5.2.2\n"
|
49 |
+
]
|
50 |
+
}
|
51 |
+
],
|
52 |
+
"source": [
|
53 |
+
"import os\n",
|
54 |
+
"from docx import Document\n",
|
55 |
+
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
56 |
+
"from openai import AzureOpenAI\n",
|
57 |
+
"import httpx\n",
|
58 |
+
"import requests\n",
|
59 |
+
"import json\n",
|
60 |
+
"import openai\n",
|
61 |
+
"import pinecone\n",
|
62 |
+
"from langchain.document_loaders import DirectoryLoader\n",
|
63 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
64 |
+
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
65 |
+
"from langchain.vectorstores import Pinecone\n",
|
66 |
+
"from langchain.llms import OpenAI\n",
|
67 |
+
"from langchain.chains.question_answering import load_qa_chain\n",
|
68 |
+
"# Initialize a session using Amazon S3\n",
|
69 |
+
"s3 = boto3.client('s3')\n",
|
70 |
+
"\n",
|
71 |
+
"def load_docx(file_content):\n",
|
72 |
+
" # Read the DOCX file content and return the text\n",
|
73 |
+
" doc = Document(io.BytesIO(file_content))\n",
|
74 |
+
" full_text = []\n",
|
75 |
+
" for para in doc.paragraphs:\n",
|
76 |
+
" full_text.append(para.text)\n",
|
77 |
+
" return '\\n'.join(full_text)\n",
|
78 |
+
"\n",
|
79 |
+
"def load_docs_from_s3(bucket_name, prefix):\n",
|
80 |
+
" documents = []\n",
|
81 |
+
" try:\n",
|
82 |
+
" # List all objects in the S3 bucket with the specified prefix\n",
|
83 |
+
" response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)\n",
|
84 |
+
" for obj in response.get('Contents', []):\n",
|
85 |
+
" if obj['Key'].endswith('.docx'):\n",
|
86 |
+
" # Read the file content from S3\n",
|
87 |
+
" file_obj = s3.get_object(Bucket=bucket_name, Key=obj['Key'])\n",
|
88 |
+
" file_content = file_obj['Body'].read()\n",
|
89 |
+
" text = load_docx(file_content)\n",
|
90 |
+
" if text:\n",
|
91 |
+
" documents.append((obj['Key'], text)) # Store filename and text together\n",
|
92 |
+
" except Exception as e:\n",
|
93 |
+
" print(f\"Error accessing S3 bucket: {e}\")\n",
|
94 |
+
" return documents\n",
|
95 |
+
"\n",
|
96 |
+
"# Example usage\n",
|
97 |
+
"\n",
|
98 |
+
"\n",
|
99 |
+
"bucket_name = 'emd-forecast'\n",
|
100 |
+
"prefix = 'gtn/input/knowledge_base/' # Replace with your actual prefix\n",
|
101 |
+
"\n",
|
102 |
+
"\n",
|
103 |
+
" \n",
|
104 |
+
"# Split documents into chunks\n",
|
105 |
+
"def split_docs(documents, chunk_size=1000, chunk_overlap=20):\n",
|
106 |
+
" text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)\n",
|
107 |
+
" docs = []\n",
|
108 |
+
" for doc_id, doc in documents:\n",
|
109 |
+
" splits = text_splitter.split_text(doc)\n",
|
110 |
+
" docs.extend([(doc_id, split) for split in splits])\n",
|
111 |
+
" return docs\n",
|
112 |
+
"\n",
|
113 |
+
"# Load documents from a directory\n",
|
114 |
+
"documents = load_docs_from_s3(bucket_name, prefix)\n",
|
115 |
+
"print(f\"Number of documents loaded: {len([d[0] for d in documents])}\")\n",
|
116 |
+
"\n",
|
117 |
+
"# Split documents into chunks\n",
|
118 |
+
"docs = split_docs(documents)\n",
|
119 |
+
"print(f\"Number of split documents: {len(docs)}\")\n",
|
120 |
+
"\n",
|
121 |
+
"# Initialize HTTP client\n",
|
122 |
+
"httpx_client = httpx.Client(http2=True, verify='cacert.pem')\n",
|
123 |
+
"\n",
|
124 |
+
"# Initialize AzureOpenAI client with the HTTP client\n",
|
125 |
+
"client = AzureOpenAI(\n",
|
126 |
+
" azure_endpoint=\"https://api.nlp.dev.uptimize.merckgroup.com\",\n",
|
127 |
+
" api_key='c6140592-6c65-4261-a959-2e2ba099526d',\n",
|
128 |
+
" api_version=\"2023-09-01-preview\",\n",
|
129 |
+
" http_client=httpx_client\n",
|
130 |
+
")\n",
|
131 |
+
"\n",
|
132 |
+
"# Generate embeddings for all document chunks\n",
|
133 |
+
"embeddings = []\n",
|
134 |
+
"\n",
|
135 |
+
"for doc_id, doc in docs:\n",
|
136 |
+
" text = doc\n",
|
137 |
+
" embedding_response = client.embeddings.create(input=text, model=\"text-embedding-ada-002\")\n",
|
138 |
+
" if embedding_response is not None:\n",
|
139 |
+
" embedding = embedding_response.data[0].embedding\n",
|
140 |
+
" embeddings.append((embedding, doc_id, text))\n",
|
141 |
+
"\n",
|
142 |
+
"api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
|
143 |
+
"base_url = \"https://ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io\"\n",
|
144 |
+
"\n",
|
145 |
+
"delete_url = f\"{base_url}/vectors/delete\"\n",
|
146 |
+
"\n",
|
147 |
+
"upsert_endpoint = f\"{base_url}/vectors/upsert\"\n",
|
148 |
+
"\n",
|
149 |
+
"# Headers with API key\n",
|
150 |
+
"headers = {\n",
|
151 |
+
" \"Content-Type\": \"application/json\",\n",
|
152 |
+
" \"Api-Key\": api_key\n",
|
153 |
+
"}\n",
|
154 |
+
"\n",
|
155 |
+
"dimension = 1536\n",
|
156 |
+
"for embed, doc_id, text in embeddings:\n",
|
157 |
+
" assert len(embed) == dimension, \"Embedding dimension mismatch\"\n",
|
158 |
+
"\n",
|
159 |
+
"# Prepare the data to be added to the index\n",
|
160 |
+
"data = {\n",
|
161 |
+
" \"vectors\": [\n",
|
162 |
+
" {\n",
|
163 |
+
" \"id\": f\"{doc_id}-{i}\", # Unique id for each chunk\n",
|
164 |
+
" \"values\": embed, # Convert numpy array to list\n",
|
165 |
+
" \"metadata\": {\"text\": text[:500]} # Store the first 500 characters of the text for preview\n",
|
166 |
+
" }\n",
|
167 |
+
" for i, (embed, doc_id, text) in enumerate(embeddings)\n",
|
168 |
+
" ]\n",
|
169 |
+
"}\n",
|
170 |
+
"\n",
|
171 |
+
"# Make the POST request to upsert data\n",
|
172 |
+
"response = requests.post(upsert_endpoint, headers=headers, json=data, verify=False)\n",
|
173 |
+
"\n",
|
174 |
+
"# Check if the request was successful\n",
|
175 |
+
"if response.status_code == 200:\n",
|
176 |
+
" print(\"Data upserted successfully.\")\n",
|
177 |
+
"else:\n",
|
178 |
+
" print(f\"Failed to upsert data: {response.status_code} - {response.reason}\")\n",
|
179 |
+
" print(\"Response content:\", response.text)\n",
|
180 |
+
"\n",
|
181 |
+
"# Dictionary to store the full text for each document chunk\n",
|
182 |
+
"doc_text_dict = {f\"{doc_id}-{i}\": text for i, (embed, doc_id, text) in enumerate(embeddings)}\n",
|
183 |
+
"# Preprocess the id to extract a consistent format\n",
|
184 |
+
"# Preprocess the id to extract a consistent format\n",
|
185 |
+
"def preprocess_id(id_str):\n",
|
186 |
+
" if id_str.startswith(\"page_content=\"):\n",
|
187 |
+
" return id_str.split(\"=\")[1].strip(\"'\")\n",
|
188 |
+
" else:\n",
|
189 |
+
" return id_str\n",
|
190 |
+
"\n",
|
191 |
+
"\n",
|
192 |
+
"\n",
|
193 |
+
"# Update the get_similar_docs function to preprocess the id before retrieval\n",
|
194 |
+
"def get_similar_docs(query, k=2, score=False):\n",
|
195 |
+
" # Generate the embedding for the query\n",
|
196 |
+
" query_embedding_response = client.embeddings.create(input=query, model=\"text-embedding-ada-002\")\n",
|
197 |
+
" query_embedding = query_embedding_response.data[0].embedding\n",
|
198 |
+
" \n",
|
199 |
+
" # Search the Pinecone index for similar documents\n",
|
200 |
+
" query_payload = {\n",
|
201 |
+
" \"top_k\": k,\n",
|
202 |
+
" \"include_values\": score,\n",
|
203 |
+
" \"vector\": query_embedding\n",
|
204 |
+
" }\n",
|
205 |
+
" query_endpoint = f\"{base_url}/query\"\n",
|
206 |
+
" query_response = requests.post(query_endpoint, headers=headers, json=query_payload, verify=False)\n",
|
207 |
+
" \n",
|
208 |
+
" # Extract and return the similar documents\n",
|
209 |
+
" if query_response.status_code == 200:\n",
|
210 |
+
" search_results = query_response.json()\n",
|
211 |
+
" print(search_results)\n",
|
212 |
+
" similar_docs = [doc_text_dict[preprocess_id(match['id'])] for match in search_results['matches']]\n",
|
213 |
+
" return similar_docs\n",
|
214 |
+
" else:\n",
|
215 |
+
" print(f\"Failed to retrieve similar documents: {query_response.status_code} - {query_response.reason}\")\n",
|
216 |
+
" print(\"Response content:\", query_response.text)\n",
|
217 |
+
" return []\n",
|
218 |
+
"\n",
|
219 |
+
"\n",
|
220 |
+
"def get_answer(query):\n",
|
221 |
+
" similar_docs = get_similar_docs(query)\n",
|
222 |
+
" combined_message = f\"Question: {query}\\nDocuments: {similar_docs}\"\n",
|
223 |
+
" openai_key = 'c6140592-6c65-4261-a959-2e2ba099526d'\n",
|
224 |
+
" openai_url = \"https://api.nlp.dev.uptimize.merckgroup.com/openai/deployments/gpt-4-turbo/chat/completions?api-version=2023-09-01-preview\"\n",
|
225 |
+
" headers = {\n",
|
226 |
+
" \"Content-Type\": \"application/json\",\n",
|
227 |
+
" \"x-api-key\": openai_key,\n",
|
228 |
+
" \"region\": \"EU\"\n",
|
229 |
+
" }\n",
|
230 |
+
" \n",
|
231 |
+
" payload = {\n",
|
232 |
+
" \"model\": \"gpt-4-turbo\",\n",
|
233 |
+
" \"messages\": [\n",
|
234 |
+
" {\"role\": \"user\", \"content\": combined_message}\n",
|
235 |
+
" ],\n",
|
236 |
+
" \"max_tokens\": 1024,\n",
|
237 |
+
" \"n\": 1,\n",
|
238 |
+
" \"temperature\": 0\n",
|
239 |
+
" }\n",
|
240 |
+
"\n",
|
241 |
+
" try:\n",
|
242 |
+
" response = requests.post(openai_url, headers=headers, data=json.dumps(payload), verify=False)\n",
|
243 |
+
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
244 |
+
" ChatGPT_reply = response.json()[\"choices\"][0][\"message\"][\"content\"]\n",
|
245 |
+
" return ChatGPT_reply\n",
|
246 |
+
" except requests.exceptions.RequestException as e:\n",
|
247 |
+
" print(\"ERROR\")\n",
|
248 |
+
" print(e)\n",
|
249 |
+
" raise Exception(f'Request failed: {e}')\n",
|
250 |
+
"\n",
|
251 |
+
"# Example usage:\n",
|
252 |
+
"query = \"What should be the S3 bucket for final Forecast dump exported from SAC?\"\n",
|
253 |
+
"answer = get_answer(query)\n",
|
254 |
+
"print(answer)\n"
|
255 |
+
]
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"cell_type": "code",
|
259 |
+
"execution_count": null,
|
260 |
+
"id": "a1fc8e0d",
|
261 |
+
"metadata": {},
|
262 |
+
"outputs": [],
|
263 |
+
"source": [
|
264 |
+
"file_key = 'gtn/input/logo_ktbot.png'\n",
|
265 |
+
"\n",
|
266 |
+
"# Download the file from S3\n",
|
267 |
+
"response = s3.get_object(Bucket=bucket_name, Key=file_key)\n",
|
268 |
+
"file_content = response['Body'].read()\n",
|
269 |
+
"\n",
|
270 |
+
"# Encode the file content in base64\n",
|
271 |
+
"png_base64 = base64.b64encode(file_content).decode('utf-8')\n",
|
272 |
+
"png_data_url = f\"data:image/png;base64,{png_base64}\"\n",
|
273 |
+
"\n",
|
274 |
+
"# Print the data URL\n",
|
275 |
+
"print(png_data_url)\n",
|
276 |
+
"\n",
|
277 |
+
"\n",
|
278 |
+
"def message_and_history(input, history):\n",
|
279 |
+
" history = history or []\n",
|
280 |
+
" s = list(sum(history, ()))\n",
|
281 |
+
" s.append(input)\n",
|
282 |
+
" inp = ' '.join(s)\n",
|
283 |
+
" output = get_answer(inp)\n",
|
284 |
+
" history.append((input, output))\n",
|
285 |
+
" return history, history\n",
|
286 |
+
"\n",
|
287 |
+
"\n",
|
288 |
+
"block = gr.Blocks(theme=gr.themes.Soft())\n",
|
289 |
+
"\n",
|
290 |
+
"with block:\n",
|
291 |
+
" with gr.Row():\n",
|
292 |
+
" with gr.Column(scale=1, min_width=300):\n",
|
293 |
+
" gr.Markdown(f\"\"\"\n",
|
294 |
+
" <div style=\"display: flex; align-items: center; justify-content: center; height: calc(100% - 50px);\">\n",
|
295 |
+
" <img src=\"{png_data_url}\" alt=\"Logo\" style=\"height: auto; max-height: 100%;\">\n",
|
296 |
+
" </div>\n",
|
297 |
+
"\n",
|
298 |
+
" \"\"\")\n",
|
299 |
+
" with gr.Column(scale=2):\n",
|
300 |
+
" chatbot = gr.Chatbot(label=\"KT Bot\")\n",
|
301 |
+
" message = gr.Textbox(placeholder=\"Your KT bot, ask me anything...\",label=\"Query\")\n",
|
302 |
+
" state = gr.State()\n",
|
303 |
+
" submit = gr.Button(\"SEND\")\n",
|
304 |
+
" submit.click(message_and_history, \n",
|
305 |
+
" inputs=[message, state], \n",
|
306 |
+
" outputs=[chatbot, state])\n",
|
307 |
+
"\n",
|
308 |
+
"# Apply custom CSS using JavaScript\n",
|
309 |
+
"block.launch(debug=True, inbrowser=True,share=True)\n",
|
310 |
+
"block.load(None, None, None, _js=\"\"\"\n",
|
311 |
+
" (function() {\n",
|
312 |
+
" var style = document.createElement('style');\n",
|
313 |
+
" style.innerHTML = `\n",
|
314 |
+
" #message-box textarea {\n",
|
315 |
+
" border: 1px solid black !important;\n",
|
316 |
+
" }\n",
|
317 |
+
" `;\n",
|
318 |
+
" document.head.appendChild(style);\n",
|
319 |
+
" })();\n",
|
320 |
+
"\"\"\")\n"
|
321 |
+
]
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"cell_type": "code",
|
325 |
+
"execution_count": null,
|
326 |
+
"id": "cebfb55e",
|
327 |
+
"metadata": {},
|
328 |
+
"outputs": [],
|
329 |
+
"source": []
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"cell_type": "code",
|
333 |
+
"execution_count": null,
|
334 |
+
"id": "f8256f50",
|
335 |
+
"metadata": {},
|
336 |
+
"outputs": [],
|
337 |
+
"source": []
|
338 |
+
}
|
339 |
+
],
|
340 |
+
"metadata": {
|
341 |
+
"kernelspec": {
|
342 |
+
"display_name": "conda_python3",
|
343 |
+
"language": "python",
|
344 |
+
"name": "conda_python3"
|
345 |
+
},
|
346 |
+
"language_info": {
|
347 |
+
"codemirror_mode": {
|
348 |
+
"name": "ipython",
|
349 |
+
"version": 3
|
350 |
+
},
|
351 |
+
"file_extension": ".py",
|
352 |
+
"mimetype": "text/x-python",
|
353 |
+
"name": "python",
|
354 |
+
"nbconvert_exporter": "python",
|
355 |
+
"pygments_lexer": "ipython3",
|
356 |
+
"version": "3.10.14"
|
357 |
+
}
|
358 |
+
},
|
359 |
+
"nbformat": 4,
|
360 |
+
"nbformat_minor": 5
|
361 |
+
}
|
.Trash-1000/files/Untitled1.ipynb
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"id": "90983f68",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"#deletion and creation of index\n",
|
11 |
+
"\n",
|
12 |
+
"import requests\n",
|
13 |
+
"\n",
|
14 |
+
"\n",
|
15 |
+
"api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
|
16 |
+
"base_url = \"https://api.pinecone.io\"\n",
|
17 |
+
"\n",
|
18 |
+
"# Index name\n",
|
19 |
+
"index_name = \"ktbot\"\n",
|
20 |
+
"\n",
|
21 |
+
"# Construct the delete URL for the index\n",
|
22 |
+
"delete_url = f\"{base_url}/indexes/{index_name}\"\n",
|
23 |
+
"\n",
|
24 |
+
"# Headers with API key\n",
|
25 |
+
"headers = {\n",
|
26 |
+
" \"Content-Type\": \"application/json\",\n",
|
27 |
+
" \"Api-Key\": api_key\n",
|
28 |
+
"}\n",
|
29 |
+
"\n",
|
30 |
+
"# Send the DELETE request to delete the existing index\n",
|
31 |
+
"response_delete = requests.delete(delete_url, headers=headers)\n",
|
32 |
+
"print(response_delete.status_code)\n",
|
33 |
+
"# Check the response status\n",
|
34 |
+
"if response_delete.status_code in (204, 202, 404):\n",
|
35 |
+
" print(f\"Index '{index_name}' deleted successfully.\")\n",
|
36 |
+
"else:\n",
|
37 |
+
" print(f\"Failed to delete index '{index_name}': {response_delete.status_code} - {response_delete.text}\")\n",
|
38 |
+
"\n",
|
39 |
+
"\n",
|
40 |
+
"\n",
|
41 |
+
"\n",
|
42 |
+
"# Construct the URL for creating the index\n",
|
43 |
+
"create_index_url = f\"{base_url}/indexes\"\n",
|
44 |
+
"\n",
|
45 |
+
"\n",
|
46 |
+
"# Define the payload for creating the index\n",
|
47 |
+
"payload = {\n",
|
48 |
+
" \"name\": index_name,\n",
|
49 |
+
" \"dimension\": 1536,\n",
|
50 |
+
" \"metric\": \"cosine\",\n",
|
51 |
+
" \"spec\": {\n",
|
52 |
+
" \"serverless\": {\n",
|
53 |
+
" \"cloud\": \"aws\",\n",
|
54 |
+
" \"region\": \"us-east-1\"\n",
|
55 |
+
" }\n",
|
56 |
+
" }\n",
|
57 |
+
"}\n",
|
58 |
+
"\n",
|
59 |
+
"# Send the POST request to create the index\n",
|
60 |
+
"response = requests.post(create_index_url, headers=headers, json=payload)\n",
|
61 |
+
"\n",
|
62 |
+
"# Check the response status\n",
|
63 |
+
"if response.status_code == 201:\n",
|
64 |
+
" print(f\"Index '{index_name}' created successfully.\")\n",
|
65 |
+
" response_json = json.loads(response.text)\n",
|
66 |
+
" host = response_json[\"host\"]\n",
|
67 |
+
" # Print the host\n",
|
68 |
+
" print(\"Host for index 'ktbot':\", host)\n",
|
69 |
+
"\n",
|
70 |
+
"else:\n",
|
71 |
+
" print(f\"Failed to create index '{index_name}': {response.status_code} - {response.text}\")\n"
|
72 |
+
]
|
73 |
+
}
|
74 |
+
],
|
75 |
+
"metadata": {
|
76 |
+
"kernelspec": {
|
77 |
+
"display_name": "conda_python3",
|
78 |
+
"language": "python",
|
79 |
+
"name": "conda_python3"
|
80 |
+
},
|
81 |
+
"language_info": {
|
82 |
+
"codemirror_mode": {
|
83 |
+
"name": "ipython",
|
84 |
+
"version": 3
|
85 |
+
},
|
86 |
+
"file_extension": ".py",
|
87 |
+
"mimetype": "text/x-python",
|
88 |
+
"name": "python",
|
89 |
+
"nbconvert_exporter": "python",
|
90 |
+
"pygments_lexer": "ipython3",
|
91 |
+
"version": "3.10.14"
|
92 |
+
}
|
93 |
+
},
|
94 |
+
"nbformat": 4,
|
95 |
+
"nbformat_minor": 5
|
96 |
+
}
|
.Trash-1000/info/Index_update.ipynb.trashinfo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[Trash Info]
|
2 |
+
Path=Index_update.ipynb
|
3 |
+
DeletionDate=2024-06-07T09:05:49
|
.Trash-1000/info/Pinecone_upsert.ipynb.trashinfo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[Trash Info]
|
2 |
+
Path=Pinecone_upsert.ipynb
|
3 |
+
DeletionDate=2024-06-18T08:33:21
|
.Trash-1000/info/Untitled 1.ipynb.trashinfo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[Trash Info]
|
2 |
+
Path=Untitled.ipynb
|
3 |
+
DeletionDate=2024-06-18T08:33:21
|
.Trash-1000/info/Untitled.ipynb.trashinfo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[Trash Info]
|
2 |
+
Path=Untitled.ipynb
|
3 |
+
DeletionDate=2024-06-07T07:59:19
|
.Trash-1000/info/Untitled1.ipynb.trashinfo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[Trash Info]
|
2 |
+
Path=Untitled1.ipynb
|
3 |
+
DeletionDate=2024-06-07T07:59:19
|
.ipynb_checkpoints/KTBOT-checkpoint.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
KTBOT.ipynb
ADDED
@@ -0,0 +1,1084 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"id": "3cb079ba",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"Collecting boto3==1.28.44\n",
|
14 |
+
" Downloading boto3-1.28.44-py3-none-any.whl.metadata (6.7 kB)\n",
|
15 |
+
"Collecting botocore<1.32.0,>=1.31.44 (from boto3==1.28.44)\n",
|
16 |
+
" Downloading botocore-1.31.85-py3-none-any.whl.metadata (6.1 kB)\n",
|
17 |
+
"Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from boto3==1.28.44) (1.0.1)\n",
|
18 |
+
"Collecting s3transfer<0.7.0,>=0.6.0 (from boto3==1.28.44)\n",
|
19 |
+
" Downloading s3transfer-0.6.2-py3-none-any.whl.metadata (1.8 kB)\n",
|
20 |
+
"Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from botocore<1.32.0,>=1.31.44->boto3==1.28.44) (2.9.0)\n",
|
21 |
+
"Collecting urllib3<2.1,>=1.25.4 (from botocore<1.32.0,>=1.31.44->boto3==1.28.44)\n",
|
22 |
+
" Downloading urllib3-2.0.7-py3-none-any.whl.metadata (6.6 kB)\n",
|
23 |
+
"Requirement already satisfied: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.32.0,>=1.31.44->boto3==1.28.44) (1.16.0)\n",
|
24 |
+
"Downloading boto3-1.28.44-py3-none-any.whl (135 kB)\n",
|
25 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m135.8/135.8 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
26 |
+
"\u001b[?25hDownloading botocore-1.31.85-py3-none-any.whl (11.3 MB)\n",
|
27 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m11.3/11.3 MB\u001b[0m \u001b[31m92.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
|
28 |
+
"\u001b[?25hDownloading s3transfer-0.6.2-py3-none-any.whl (79 kB)\n",
|
29 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m79.8/79.8 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
30 |
+
"\u001b[?25hDownloading urllib3-2.0.7-py3-none-any.whl (124 kB)\n",
|
31 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m124.2/124.2 kB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
32 |
+
"\u001b[?25hInstalling collected packages: urllib3, botocore, s3transfer, boto3\n",
|
33 |
+
" Attempting uninstall: urllib3\n",
|
34 |
+
" Found existing installation: urllib3 2.2.1\n",
|
35 |
+
" Uninstalling urllib3-2.2.1:\n",
|
36 |
+
" Successfully uninstalled urllib3-2.2.1\n",
|
37 |
+
" Attempting uninstall: botocore\n",
|
38 |
+
" Found existing installation: botocore 1.34.101\n",
|
39 |
+
" Uninstalling botocore-1.34.101:\n",
|
40 |
+
" Successfully uninstalled botocore-1.34.101\n",
|
41 |
+
" Attempting uninstall: s3transfer\n",
|
42 |
+
" Found existing installation: s3transfer 0.10.1\n",
|
43 |
+
" Uninstalling s3transfer-0.10.1:\n",
|
44 |
+
" Successfully uninstalled s3transfer-0.10.1\n",
|
45 |
+
" Attempting uninstall: boto3\n",
|
46 |
+
" Found existing installation: boto3 1.34.101\n",
|
47 |
+
" Uninstalling boto3-1.34.101:\n",
|
48 |
+
" Successfully uninstalled boto3-1.34.101\n",
|
49 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
50 |
+
"awscli 1.32.101 requires botocore==1.34.101, but you have botocore 1.31.85 which is incompatible.\n",
|
51 |
+
"awscli 1.32.101 requires s3transfer<0.11.0,>=0.10.0, but you have s3transfer 0.6.2 which is incompatible.\n",
|
52 |
+
"sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
|
53 |
+
"sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.1 which is incompatible.\n",
|
54 |
+
"sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
|
55 |
+
"\u001b[0mSuccessfully installed boto3-1.28.44 botocore-1.31.85 s3transfer-0.6.2 urllib3-2.0.7\n",
|
56 |
+
"Collecting docx==0.2.4\n",
|
57 |
+
" Downloading docx-0.2.4.tar.gz (54 kB)\n",
|
58 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m54.9/54.9 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
59 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
60 |
+
"\u001b[?25hCollecting lxml (from docx==0.2.4)\n",
|
61 |
+
" Downloading lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n",
|
62 |
+
"Requirement already satisfied: Pillow>=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from docx==0.2.4) (10.2.0)\n",
|
63 |
+
"Downloading lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.0 MB)\n",
|
64 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m55.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
|
65 |
+
"\u001b[?25hBuilding wheels for collected packages: docx\n",
|
66 |
+
" Building wheel for docx (setup.py) ... \u001b[?25ldone\n",
|
67 |
+
"\u001b[?25h Created wheel for docx: filename=docx-0.2.4-py3-none-any.whl size=53895 sha256=95ee7562fbf76e3f8f45492dfc3ff7802626fe7573e681d40e6bb9e1ee6a85fd\n",
|
68 |
+
" Stored in directory: /home/ec2-user/.cache/pip/wheels/81/f5/1d/e09ba2c1907a43a4146d1189ae4733ca1a3bfe27ee39507767\n",
|
69 |
+
"Successfully built docx\n",
|
70 |
+
"Installing collected packages: lxml, docx\n",
|
71 |
+
"Successfully installed docx-0.2.4 lxml-5.2.2\n",
|
72 |
+
"Collecting httpx==0.26.0 (from httpx[http2]==0.26.0)\n",
|
73 |
+
" Downloading httpx-0.26.0-py3-none-any.whl.metadata (7.6 kB)\n",
|
74 |
+
"Requirement already satisfied: anyio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (4.3.0)\n",
|
75 |
+
"Requirement already satisfied: certifi in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (2024.2.2)\n",
|
76 |
+
"Requirement already satisfied: httpcore==1.* in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (1.0.4)\n",
|
77 |
+
"Requirement already satisfied: idna in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (3.6)\n",
|
78 |
+
"Requirement already satisfied: sniffio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (1.3.1)\n",
|
79 |
+
"Requirement already satisfied: h2<5,>=3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx[http2]==0.26.0) (4.1.0)\n",
|
80 |
+
"Requirement already satisfied: h11<0.15,>=0.13 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpcore==1.*->httpx==0.26.0->httpx[http2]==0.26.0) (0.14.0)\n",
|
81 |
+
"Requirement already satisfied: hyperframe<7,>=6.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from h2<5,>=3->httpx[http2]==0.26.0) (6.0.1)\n",
|
82 |
+
"Requirement already satisfied: hpack<5,>=4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from h2<5,>=3->httpx[http2]==0.26.0) (4.0.0)\n",
|
83 |
+
"Requirement already satisfied: exceptiongroup>=1.0.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio->httpx==0.26.0->httpx[http2]==0.26.0) (1.2.0)\n",
|
84 |
+
"Requirement already satisfied: typing-extensions>=4.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio->httpx==0.26.0->httpx[http2]==0.26.0) (4.10.0)\n",
|
85 |
+
"Downloading httpx-0.26.0-py3-none-any.whl (75 kB)\n",
|
86 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
87 |
+
"\u001b[?25hInstalling collected packages: httpx\n",
|
88 |
+
" Attempting uninstall: httpx\n",
|
89 |
+
" Found existing installation: httpx 0.27.0\n",
|
90 |
+
" Uninstalling httpx-0.27.0:\n",
|
91 |
+
" Successfully uninstalled httpx-0.27.0\n",
|
92 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
93 |
+
"sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.1 which is incompatible.\u001b[0m\u001b[31m\n",
|
94 |
+
"\u001b[0mSuccessfully installed httpx-0.26.0\n",
|
95 |
+
"Collecting langchain==0.2.0\n",
|
96 |
+
" Downloading langchain-0.2.0-py3-none-any.whl.metadata (13 kB)\n",
|
97 |
+
"Requirement already satisfied: PyYAML>=5.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (6.0.1)\n",
|
98 |
+
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (2.0.29)\n",
|
99 |
+
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (3.9.3)\n",
|
100 |
+
"Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (4.0.3)\n",
|
101 |
+
"Collecting dataclasses-json<0.7,>=0.5.7 (from langchain==0.2.0)\n",
|
102 |
+
" Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n",
|
103 |
+
"Collecting langchain-core<0.3.0,>=0.2.0 (from langchain==0.2.0)\n",
|
104 |
+
" Downloading langchain_core-0.2.8-py3-none-any.whl.metadata (5.8 kB)\n",
|
105 |
+
"Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain==0.2.0)\n",
|
106 |
+
" Downloading langchain_text_splitters-0.2.1-py3-none-any.whl.metadata (2.2 kB)\n"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"name": "stdout",
|
111 |
+
"output_type": "stream",
|
112 |
+
"text": [
|
113 |
+
"Collecting langsmith<0.2.0,>=0.1.17 (from langchain==0.2.0)\n",
|
114 |
+
" Downloading langsmith-0.1.79-py3-none-any.whl.metadata (13 kB)\n",
|
115 |
+
"Requirement already satisfied: numpy<2,>=1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (1.22.4)\n",
|
116 |
+
"Collecting pydantic<3,>=1 (from langchain==0.2.0)\n",
|
117 |
+
" Downloading pydantic-2.7.4-py3-none-any.whl.metadata (109 kB)\n",
|
118 |
+
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m109.4/109.4 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
119 |
+
"\u001b[?25hRequirement already satisfied: requests<3,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (2.31.0)\n",
|
120 |
+
"Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (8.2.3)\n",
|
121 |
+
"Requirement already satisfied: aiosignal>=1.1.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (1.3.1)\n",
|
122 |
+
"Requirement already satisfied: attrs>=17.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (23.2.0)\n",
|
123 |
+
"Requirement already satisfied: frozenlist>=1.1.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (1.4.1)\n",
|
124 |
+
"Requirement already satisfied: multidict<7.0,>=4.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (6.0.5)\n",
|
125 |
+
"Requirement already satisfied: yarl<2.0,>=1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (1.9.4)\n",
|
126 |
+
"Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain==0.2.0)\n",
|
127 |
+
" Downloading marshmallow-3.21.3-py3-none-any.whl.metadata (7.1 kB)\n",
|
128 |
+
"Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain==0.2.0)\n",
|
129 |
+
" Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n",
|
130 |
+
"Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.0->langchain==0.2.0)\n",
|
131 |
+
" Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)\n",
|
132 |
+
"Collecting packaging<25,>=23.2 (from langchain-core<0.3.0,>=0.2.0->langchain==0.2.0)\n",
|
133 |
+
" Downloading packaging-24.1-py3-none-any.whl.metadata (3.2 kB)\n",
|
134 |
+
"Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain==0.2.0)\n",
|
135 |
+
" Downloading orjson-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (49 kB)\n",
|
136 |
+
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m49.7/49.7 kB\u001b[0m \u001b[31m541.6 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
137 |
+
"\u001b[?25hCollecting annotated-types>=0.4.0 (from pydantic<3,>=1->langchain==0.2.0)\n",
|
138 |
+
" Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
|
139 |
+
"Collecting pydantic-core==2.18.4 (from pydantic<3,>=1->langchain==0.2.0)\n",
|
140 |
+
" Downloading pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n",
|
141 |
+
"Requirement already satisfied: typing-extensions>=4.6.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1->langchain==0.2.0) (4.10.0)\n",
|
142 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.2.0) (3.3.2)\n",
|
143 |
+
"Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.2.0) (3.6)\n",
|
144 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.2.0) (2.0.7)\n",
|
145 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.2.0) (2024.2.2)\n",
|
146 |
+
"Requirement already satisfied: greenlet!=0.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain==0.2.0) (3.0.3)\n",
|
147 |
+
"Requirement already satisfied: jsonpointer>=1.9 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.0->langchain==0.2.0) (2.4)\n",
|
148 |
+
"Requirement already satisfied: mypy-extensions>=0.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain==0.2.0) (1.0.0)\n",
|
149 |
+
"Downloading langchain-0.2.0-py3-none-any.whl (973 kB)\n",
|
150 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m973.7/973.7 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
151 |
+
"\u001b[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n",
|
152 |
+
"Downloading langchain_core-0.2.8-py3-none-any.whl (315 kB)\n",
|
153 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m315.8/315.8 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
|
154 |
+
"\u001b[?25hDownloading langchain_text_splitters-0.2.1-py3-none-any.whl (23 kB)\n",
|
155 |
+
"Downloading langsmith-0.1.79-py3-none-any.whl (125 kB)\n",
|
156 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m125.3/125.3 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
157 |
+
"\u001b[?25hDownloading pydantic-2.7.4-py3-none-any.whl (409 kB)\n",
|
158 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m409.0/409.0 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
159 |
+
"\u001b[?25hDownloading pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
|
160 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
161 |
+
"\u001b[?25hDownloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
|
162 |
+
"Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n",
|
163 |
+
"Downloading marshmallow-3.21.3-py3-none-any.whl (49 kB)\n",
|
164 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m49.2/49.2 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
165 |
+
"\u001b[?25hDownloading orjson-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n",
|
166 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m145.0/145.0 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
167 |
+
"\u001b[?25hDownloading packaging-24.1-py3-none-any.whl (53 kB)\n",
|
168 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
169 |
+
"\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
|
170 |
+
"Installing collected packages: typing-inspect, pydantic-core, packaging, orjson, jsonpatch, annotated-types, pydantic, marshmallow, langsmith, dataclasses-json, langchain-core, langchain-text-splitters, langchain\n",
|
171 |
+
" Attempting uninstall: packaging\n",
|
172 |
+
" Found existing installation: packaging 21.3\n",
|
173 |
+
" Uninstalling packaging-21.3:\n",
|
174 |
+
" Successfully uninstalled packaging-21.3\n",
|
175 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
176 |
+
"sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
|
177 |
+
"sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.1 which is incompatible.\n",
|
178 |
+
"sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
|
179 |
+
"\u001b[0mSuccessfully installed annotated-types-0.7.0 dataclasses-json-0.6.7 jsonpatch-1.33 langchain-0.2.0 langchain-core-0.2.8 langchain-text-splitters-0.2.1 langsmith-0.1.79 marshmallow-3.21.3 orjson-3.10.5 packaging-24.1 pydantic-2.7.4 pydantic-core-2.18.4 typing-inspect-0.9.0\n",
|
180 |
+
"Collecting openai==1.30.1\n",
|
181 |
+
" Downloading openai-1.30.1-py3-none-any.whl.metadata (21 kB)\n",
|
182 |
+
"Requirement already satisfied: anyio<5,>=3.5.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (4.3.0)\n",
|
183 |
+
"Collecting distro<2,>=1.7.0 (from openai==1.30.1)\n",
|
184 |
+
" Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n",
|
185 |
+
"Requirement already satisfied: httpx<1,>=0.23.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (0.26.0)\n",
|
186 |
+
"Requirement already satisfied: pydantic<3,>=1.9.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (2.7.4)\n",
|
187 |
+
"Requirement already satisfied: sniffio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (1.3.1)\n",
|
188 |
+
"Requirement already satisfied: tqdm>4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (4.66.2)\n",
|
189 |
+
"Requirement already satisfied: typing-extensions<5,>=4.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (4.10.0)\n"
|
190 |
+
]
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"name": "stdout",
|
194 |
+
"output_type": "stream",
|
195 |
+
"text": [
|
196 |
+
"Requirement already satisfied: idna>=2.8 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai==1.30.1) (3.6)\n",
|
197 |
+
"Requirement already satisfied: exceptiongroup>=1.0.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai==1.30.1) (1.2.0)\n",
|
198 |
+
"Requirement already satisfied: certifi in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai==1.30.1) (2024.2.2)\n",
|
199 |
+
"Requirement already satisfied: httpcore==1.* in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai==1.30.1) (1.0.4)\n",
|
200 |
+
"Requirement already satisfied: h11<0.15,>=0.13 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai==1.30.1) (0.14.0)\n",
|
201 |
+
"Requirement already satisfied: annotated-types>=0.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->openai==1.30.1) (0.7.0)\n",
|
202 |
+
"Requirement already satisfied: pydantic-core==2.18.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->openai==1.30.1) (2.18.4)\n",
|
203 |
+
"Downloading openai-1.30.1-py3-none-any.whl (320 kB)\n",
|
204 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m320.6/320.6 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
205 |
+
"\u001b[?25hDownloading distro-1.9.0-py3-none-any.whl (20 kB)\n",
|
206 |
+
"Installing collected packages: distro, openai\n",
|
207 |
+
"Successfully installed distro-1.9.0 openai-1.30.1\n",
|
208 |
+
"Collecting pandas==2.2.2\n",
|
209 |
+
" Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)\n",
|
210 |
+
"Requirement already satisfied: numpy>=1.22.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas==2.2.2) (1.22.4)\n",
|
211 |
+
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas==2.2.2) (2.9.0)\n",
|
212 |
+
"Requirement already satisfied: pytz>=2020.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas==2.2.2) (2024.1)\n",
|
213 |
+
"Requirement already satisfied: tzdata>=2022.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas==2.2.2) (2024.1)\n",
|
214 |
+
"Requirement already satisfied: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas==2.2.2) (1.16.0)\n",
|
215 |
+
"Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)\n",
|
216 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m56.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
|
217 |
+
"\u001b[?25hInstalling collected packages: pandas\n",
|
218 |
+
" Attempting uninstall: pandas\n",
|
219 |
+
" Found existing installation: pandas 2.2.1\n",
|
220 |
+
" Uninstalling pandas-2.2.1:\n",
|
221 |
+
" Successfully uninstalled pandas-2.2.1\n",
|
222 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
223 |
+
"autovizwidget 0.21.0 requires pandas<2.0.0,>=0.20.1, but you have pandas 2.2.2 which is incompatible.\n",
|
224 |
+
"hdijupyterutils 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
|
225 |
+
"sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
|
226 |
+
"sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\u001b[0m\u001b[31m\n",
|
227 |
+
"\u001b[0mSuccessfully installed pandas-2.2.2\n",
|
228 |
+
"Collecting pinecone-client==4.1.0\n",
|
229 |
+
" Downloading pinecone_client-4.1.0-py3-none-any.whl.metadata (16 kB)\n",
|
230 |
+
"Requirement already satisfied: certifi>=2019.11.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pinecone-client==4.1.0) (2024.2.2)\n",
|
231 |
+
"Requirement already satisfied: tqdm>=4.64.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pinecone-client==4.1.0) (4.66.2)\n",
|
232 |
+
"Requirement already satisfied: typing-extensions>=3.7.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pinecone-client==4.1.0) (4.10.0)\n",
|
233 |
+
"Requirement already satisfied: urllib3>=1.26.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pinecone-client==4.1.0) (2.0.7)\n",
|
234 |
+
"Downloading pinecone_client-4.1.0-py3-none-any.whl (215 kB)\n",
|
235 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m215.5/215.5 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
236 |
+
"\u001b[?25hInstalling collected packages: pinecone-client\n",
|
237 |
+
"Successfully installed pinecone-client-4.1.0\n",
|
238 |
+
"Collecting streamlit==1.32.2\n",
|
239 |
+
" Downloading streamlit-1.32.2-py2.py3-none-any.whl.metadata (8.5 kB)\n",
|
240 |
+
"Collecting altair<6,>=4.0 (from streamlit==1.32.2)\n",
|
241 |
+
" Downloading altair-5.3.0-py3-none-any.whl.metadata (9.2 kB)\n",
|
242 |
+
"Requirement already satisfied: blinker<2,>=1.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (1.7.0)\n",
|
243 |
+
"Collecting cachetools<6,>=4.0 (from streamlit==1.32.2)\n",
|
244 |
+
" Downloading cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)\n",
|
245 |
+
"Requirement already satisfied: click<9,>=7.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (8.1.7)\n",
|
246 |
+
"Requirement already satisfied: numpy<2,>=1.19.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (1.22.4)\n",
|
247 |
+
"Collecting packaging<24,>=16.8 (from streamlit==1.32.2)\n",
|
248 |
+
" Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)\n",
|
249 |
+
"Requirement already satisfied: pandas<3,>=1.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (2.2.2)\n",
|
250 |
+
"Requirement already satisfied: pillow<11,>=7.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (10.2.0)\n",
|
251 |
+
"Requirement already satisfied: protobuf<5,>=3.20 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (4.25.3)\n",
|
252 |
+
"Requirement already satisfied: pyarrow>=7.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (15.0.2)\n",
|
253 |
+
"Requirement already satisfied: requests<3,>=2.27 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (2.31.0)\n",
|
254 |
+
"Requirement already satisfied: rich<14,>=10.14.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (13.7.1)\n",
|
255 |
+
"Requirement already satisfied: tenacity<9,>=8.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (8.2.3)\n",
|
256 |
+
"Requirement already satisfied: toml<2,>=0.10.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (0.10.2)\n",
|
257 |
+
"Requirement already satisfied: typing-extensions<5,>=4.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (4.10.0)\n",
|
258 |
+
"Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit==1.32.2)\n",
|
259 |
+
" Using cached GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)\n",
|
260 |
+
"Collecting pydeck<1,>=0.8.0b4 (from streamlit==1.32.2)\n",
|
261 |
+
" Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n",
|
262 |
+
"Requirement already satisfied: tornado<7,>=6.0.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (6.4)\n",
|
263 |
+
"Requirement already satisfied: watchdog>=2.1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (4.0.0)\n",
|
264 |
+
"Requirement already satisfied: jinja2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6,>=4.0->streamlit==1.32.2) (3.1.3)\n",
|
265 |
+
"Requirement already satisfied: jsonschema>=3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6,>=4.0->streamlit==1.32.2) (4.21.1)\n",
|
266 |
+
"Requirement already satisfied: toolz in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6,>=4.0->streamlit==1.32.2) (0.12.1)\n",
|
267 |
+
"Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit==1.32.2)\n",
|
268 |
+
" Using cached gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)\n",
|
269 |
+
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3,>=1.3.0->streamlit==1.32.2) (2.9.0)\n",
|
270 |
+
"Requirement already satisfied: pytz>=2020.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3,>=1.3.0->streamlit==1.32.2) (2024.1)\n",
|
271 |
+
"Requirement already satisfied: tzdata>=2022.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3,>=1.3.0->streamlit==1.32.2) (2024.1)\n"
|
272 |
+
]
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"name": "stdout",
|
276 |
+
"output_type": "stream",
|
277 |
+
"text": [
|
278 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.27->streamlit==1.32.2) (3.3.2)\n",
|
279 |
+
"Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.27->streamlit==1.32.2) (3.6)\n",
|
280 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.27->streamlit==1.32.2) (2.0.7)\n",
|
281 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.27->streamlit==1.32.2) (2024.2.2)\n",
|
282 |
+
"Requirement already satisfied: markdown-it-py>=2.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from rich<14,>=10.14.0->streamlit==1.32.2) (3.0.0)\n",
|
283 |
+
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from rich<14,>=10.14.0->streamlit==1.32.2) (2.17.2)\n",
|
284 |
+
"Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit==1.32.2)\n",
|
285 |
+
" Using cached smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)\n",
|
286 |
+
"Requirement already satisfied: MarkupSafe>=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jinja2->altair<6,>=4.0->streamlit==1.32.2) (2.1.5)\n",
|
287 |
+
"Requirement already satisfied: attrs>=22.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit==1.32.2) (23.2.0)\n",
|
288 |
+
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit==1.32.2) (2023.12.1)\n",
|
289 |
+
"Requirement already satisfied: referencing>=0.28.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit==1.32.2) (0.34.0)\n",
|
290 |
+
"Requirement already satisfied: rpds-py>=0.7.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit==1.32.2) (0.18.0)\n",
|
291 |
+
"Requirement already satisfied: mdurl~=0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit==1.32.2) (0.1.2)\n",
|
292 |
+
"Requirement already satisfied: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas<3,>=1.3.0->streamlit==1.32.2) (1.16.0)\n",
|
293 |
+
"Downloading streamlit-1.32.2-py2.py3-none-any.whl (8.1 MB)\n",
|
294 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m8.1/8.1 MB\u001b[0m \u001b[31m102.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
|
295 |
+
"\u001b[?25hDownloading altair-5.3.0-py3-none-any.whl (857 kB)\n",
|
296 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m857.8/857.8 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
|
297 |
+
"\u001b[?25hDownloading cachetools-5.3.3-py3-none-any.whl (9.3 kB)\n",
|
298 |
+
"Using cached GitPython-3.1.43-py3-none-any.whl (207 kB)\n",
|
299 |
+
"Downloading packaging-23.2-py3-none-any.whl (53 kB)\n",
|
300 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
301 |
+
"\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n",
|
302 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m96.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
303 |
+
"\u001b[?25hUsing cached gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
|
304 |
+
"Using cached smmap-5.0.1-py3-none-any.whl (24 kB)\n",
|
305 |
+
"Installing collected packages: smmap, packaging, cachetools, pydeck, gitdb, gitpython, altair, streamlit\n",
|
306 |
+
" Attempting uninstall: packaging\n",
|
307 |
+
" Found existing installation: packaging 24.1\n",
|
308 |
+
" Uninstalling packaging-24.1:\n",
|
309 |
+
" Successfully uninstalled packaging-24.1\n",
|
310 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
311 |
+
"sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
|
312 |
+
"sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
|
313 |
+
"sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
|
314 |
+
"\u001b[0mSuccessfully installed altair-5.3.0 cachetools-5.3.3 gitdb-4.0.11 gitpython-3.1.43 packaging-23.2 pydeck-0.9.1 smmap-5.0.1 streamlit-1.32.2\n",
|
315 |
+
"Collecting requests==2.28.1\n",
|
316 |
+
" Downloading requests-2.28.1-py3-none-any.whl.metadata (4.6 kB)\n",
|
317 |
+
"Collecting charset-normalizer<3,>=2 (from requests==2.28.1)\n",
|
318 |
+
" Downloading charset_normalizer-2.1.1-py3-none-any.whl.metadata (11 kB)\n",
|
319 |
+
"Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests==2.28.1) (3.6)\n",
|
320 |
+
"Collecting urllib3<1.27,>=1.21.1 (from requests==2.28.1)\n",
|
321 |
+
" Downloading urllib3-1.26.19-py2.py3-none-any.whl.metadata (49 kB)\n",
|
322 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
323 |
+
"\u001b[?25hRequirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests==2.28.1) (2024.2.2)\n",
|
324 |
+
"Downloading requests-2.28.1-py3-none-any.whl (62 kB)\n",
|
325 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
326 |
+
"\u001b[?25hDownloading charset_normalizer-2.1.1-py3-none-any.whl (39 kB)\n",
|
327 |
+
"Downloading urllib3-1.26.19-py2.py3-none-any.whl (143 kB)\n",
|
328 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m143.9/143.9 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
329 |
+
"\u001b[?25hInstalling collected packages: urllib3, charset-normalizer, requests\n",
|
330 |
+
" Attempting uninstall: urllib3\n",
|
331 |
+
" Found existing installation: urllib3 2.0.7\n",
|
332 |
+
" Uninstalling urllib3-2.0.7:\n",
|
333 |
+
" Successfully uninstalled urllib3-2.0.7\n",
|
334 |
+
" Attempting uninstall: charset-normalizer\n",
|
335 |
+
" Found existing installation: charset-normalizer 3.3.2\n",
|
336 |
+
" Uninstalling charset-normalizer-3.3.2:\n",
|
337 |
+
" Successfully uninstalled charset-normalizer-3.3.2\n",
|
338 |
+
" Attempting uninstall: requests\n",
|
339 |
+
" Found existing installation: requests 2.31.0\n",
|
340 |
+
" Uninstalling requests-2.31.0:\n",
|
341 |
+
" Successfully uninstalled requests-2.31.0\n",
|
342 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
343 |
+
"jupyterlab-server 2.25.4 requires requests>=2.31, but you have requests 2.28.1 which is incompatible.\n",
|
344 |
+
"sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
|
345 |
+
"sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
|
346 |
+
"sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
|
347 |
+
"\u001b[0mSuccessfully installed charset-normalizer-2.1.1 requests-2.28.1 urllib3-1.26.19\n",
|
348 |
+
"Collecting python-docx==0.8.11\n",
|
349 |
+
" Downloading python-docx-0.8.11.tar.gz (5.6 MB)\n",
|
350 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m25.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
|
351 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
352 |
+
"\u001b[?25hRequirement already satisfied: lxml>=2.3.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-docx==0.8.11) (5.2.2)\n",
|
353 |
+
"Building wheels for collected packages: python-docx\n",
|
354 |
+
" Building wheel for python-docx (setup.py) ... \u001b[?25ldone\n",
|
355 |
+
"\u001b[?25h Created wheel for python-docx: filename=python_docx-0.8.11-py3-none-any.whl size=184488 sha256=1beea9ffacd183f8637c48e55e4ecb5a01557173f2b87cf7bee13b186b97942e\n",
|
356 |
+
" Stored in directory: /home/ec2-user/.cache/pip/wheels/80/27/06/837436d4c3bd989b957a91679966f207bfd71d358d63a8194d\n",
|
357 |
+
"Successfully built python-docx\n",
|
358 |
+
"Installing collected packages: python-docx\n",
|
359 |
+
"Successfully installed python-docx-0.8.11\n",
|
360 |
+
"Collecting langchain-community\n",
|
361 |
+
" Downloading langchain_community-0.2.5-py3-none-any.whl.metadata (2.5 kB)\n",
|
362 |
+
"Requirement already satisfied: PyYAML>=5.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (6.0.1)\n",
|
363 |
+
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (2.0.29)\n",
|
364 |
+
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (3.9.3)\n",
|
365 |
+
"Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (0.6.7)\n"
|
366 |
+
]
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"name": "stdout",
|
370 |
+
"output_type": "stream",
|
371 |
+
"text": [
|
372 |
+
"Collecting langchain<0.3.0,>=0.2.5 (from langchain-community)\n",
|
373 |
+
" Downloading langchain-0.2.5-py3-none-any.whl.metadata (7.0 kB)\n",
|
374 |
+
"Requirement already satisfied: langchain-core<0.3.0,>=0.2.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (0.2.8)\n",
|
375 |
+
"Requirement already satisfied: langsmith<0.2.0,>=0.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (0.1.79)\n",
|
376 |
+
"Requirement already satisfied: numpy<2,>=1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (1.22.4)\n",
|
377 |
+
"Requirement already satisfied: requests<3,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (2.28.1)\n",
|
378 |
+
"Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (8.2.3)\n",
|
379 |
+
"Requirement already satisfied: aiosignal>=1.1.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.1)\n",
|
380 |
+
"Requirement already satisfied: attrs>=17.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (23.2.0)\n",
|
381 |
+
"Requirement already satisfied: frozenlist>=1.1.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.4.1)\n",
|
382 |
+
"Requirement already satisfied: multidict<7.0,>=4.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.0.5)\n",
|
383 |
+
"Requirement already satisfied: yarl<2.0,>=1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.9.4)\n",
|
384 |
+
"Requirement already satisfied: async-timeout<5.0,>=4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (4.0.3)\n",
|
385 |
+
"Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (3.21.3)\n",
|
386 |
+
"Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n",
|
387 |
+
"Requirement already satisfied: langchain-text-splitters<0.3.0,>=0.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain<0.3.0,>=0.2.5->langchain-community) (0.2.1)\n",
|
388 |
+
"Requirement already satisfied: pydantic<3,>=1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain<0.3.0,>=0.2.5->langchain-community) (2.7.4)\n",
|
389 |
+
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-core<0.3.0,>=0.2.7->langchain-community) (1.33)\n",
|
390 |
+
"Requirement already satisfied: packaging<25,>=23.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-core<0.3.0,>=0.2.7->langchain-community) (23.2)\n",
|
391 |
+
"Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.0->langchain-community) (3.10.5)\n",
|
392 |
+
"Requirement already satisfied: charset-normalizer<3,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (2.1.1)\n",
|
393 |
+
"Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (3.6)\n",
|
394 |
+
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (1.26.19)\n",
|
395 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (2024.2.2)\n",
|
396 |
+
"Requirement already satisfied: typing-extensions>=4.6.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain-community) (4.10.0)\n",
|
397 |
+
"Requirement already satisfied: greenlet!=0.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain-community) (3.0.3)\n",
|
398 |
+
"Requirement already satisfied: jsonpointer>=1.9 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.7->langchain-community) (2.4)\n",
|
399 |
+
"Requirement already satisfied: annotated-types>=0.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.5->langchain-community) (0.7.0)\n",
|
400 |
+
"Requirement already satisfied: pydantic-core==2.18.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.5->langchain-community) (2.18.4)\n",
|
401 |
+
"Requirement already satisfied: mypy-extensions>=0.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community) (1.0.0)\n",
|
402 |
+
"Downloading langchain_community-0.2.5-py3-none-any.whl (2.2 MB)\n",
|
403 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m62.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
404 |
+
"\u001b[?25hDownloading langchain-0.2.5-py3-none-any.whl (974 kB)\n",
|
405 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m974.6/974.6 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
|
406 |
+
"\u001b[?25hInstalling collected packages: langchain, langchain-community\n",
|
407 |
+
" Attempting uninstall: langchain\n",
|
408 |
+
" Found existing installation: langchain 0.2.0\n",
|
409 |
+
" Uninstalling langchain-0.2.0:\n",
|
410 |
+
" Successfully uninstalled langchain-0.2.0\n",
|
411 |
+
"Successfully installed langchain-0.2.5 langchain-community-0.2.5\n",
|
412 |
+
"Collecting gradio\n",
|
413 |
+
" Downloading gradio-4.36.1-py3-none-any.whl.metadata (15 kB)\n",
|
414 |
+
"Collecting aiofiles<24.0,>=22.0 (from gradio)\n",
|
415 |
+
" Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n",
|
416 |
+
"Requirement already satisfied: altair<6.0,>=4.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (5.3.0)\n",
|
417 |
+
"Collecting fastapi (from gradio)\n",
|
418 |
+
" Downloading fastapi-0.111.0-py3-none-any.whl.metadata (25 kB)\n",
|
419 |
+
"Collecting ffmpy (from gradio)\n",
|
420 |
+
" Downloading ffmpy-0.3.2.tar.gz (5.5 kB)\n",
|
421 |
+
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
422 |
+
"\u001b[?25hCollecting gradio-client==1.0.1 (from gradio)\n",
|
423 |
+
" Downloading gradio_client-1.0.1-py3-none-any.whl.metadata (7.1 kB)\n",
|
424 |
+
"Requirement already satisfied: httpx>=0.24.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (0.26.0)\n",
|
425 |
+
"Collecting huggingface-hub>=0.19.3 (from gradio)\n",
|
426 |
+
" Downloading huggingface_hub-0.23.4-py3-none-any.whl.metadata (12 kB)\n",
|
427 |
+
"Requirement already satisfied: importlib-resources<7.0,>=1.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (6.4.0)\n",
|
428 |
+
"Requirement already satisfied: jinja2<4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (3.1.3)\n",
|
429 |
+
"Requirement already satisfied: markupsafe~=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (2.1.5)\n",
|
430 |
+
"Requirement already satisfied: matplotlib~=3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (3.8.3)\n",
|
431 |
+
"Requirement already satisfied: numpy<3.0,>=1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (1.22.4)\n",
|
432 |
+
"Requirement already satisfied: orjson~=3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (3.10.5)\n",
|
433 |
+
"Requirement already satisfied: packaging in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (23.2)\n",
|
434 |
+
"Requirement already satisfied: pandas<3.0,>=1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (2.2.2)\n",
|
435 |
+
"Requirement already satisfied: pillow<11.0,>=8.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (10.2.0)\n",
|
436 |
+
"Requirement already satisfied: pydantic>=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (2.7.4)\n",
|
437 |
+
"Collecting pydub (from gradio)\n",
|
438 |
+
" Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
|
439 |
+
"Collecting python-multipart>=0.0.9 (from gradio)\n",
|
440 |
+
" Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)\n"
|
441 |
+
]
|
442 |
+
},
|
443 |
+
{
|
444 |
+
"name": "stdout",
|
445 |
+
"output_type": "stream",
|
446 |
+
"text": [
|
447 |
+
"Requirement already satisfied: pyyaml<7.0,>=5.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (6.0.1)\n",
|
448 |
+
"Collecting ruff>=0.2.2 (from gradio)\n",
|
449 |
+
" Downloading ruff-0.4.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (24 kB)\n",
|
450 |
+
"Collecting semantic-version~=2.0 (from gradio)\n",
|
451 |
+
" Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n",
|
452 |
+
"Collecting tomlkit==0.12.0 (from gradio)\n",
|
453 |
+
" Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\n",
|
454 |
+
"Collecting typer<1.0,>=0.12 (from gradio)\n",
|
455 |
+
" Downloading typer-0.12.3-py3-none-any.whl.metadata (15 kB)\n",
|
456 |
+
"Requirement already satisfied: typing-extensions~=4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (4.10.0)\n",
|
457 |
+
"Collecting urllib3~=2.0 (from gradio)\n",
|
458 |
+
" Downloading urllib3-2.2.2-py3-none-any.whl.metadata (6.4 kB)\n",
|
459 |
+
"Collecting uvicorn>=0.14.0 (from gradio)\n",
|
460 |
+
" Downloading uvicorn-0.30.1-py3-none-any.whl.metadata (6.3 kB)\n",
|
461 |
+
"Requirement already satisfied: fsspec in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio-client==1.0.1->gradio) (2024.3.1)\n",
|
462 |
+
"Collecting websockets<12.0,>=10.0 (from gradio-client==1.0.1->gradio)\n",
|
463 |
+
" Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
|
464 |
+
"Requirement already satisfied: jsonschema>=3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6.0,>=4.2.0->gradio) (4.21.1)\n",
|
465 |
+
"Requirement already satisfied: toolz in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6.0,>=4.2.0->gradio) (0.12.1)\n",
|
466 |
+
"Requirement already satisfied: anyio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (4.3.0)\n",
|
467 |
+
"Requirement already satisfied: certifi in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (2024.2.2)\n",
|
468 |
+
"Requirement already satisfied: httpcore==1.* in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (1.0.4)\n",
|
469 |
+
"Requirement already satisfied: idna in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (3.6)\n",
|
470 |
+
"Requirement already satisfied: sniffio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (1.3.1)\n",
|
471 |
+
"Requirement already satisfied: h11<0.15,>=0.13 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.14.0)\n",
|
472 |
+
"Requirement already satisfied: filelock in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from huggingface-hub>=0.19.3->gradio) (3.13.3)\n",
|
473 |
+
"Requirement already satisfied: requests in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from huggingface-hub>=0.19.3->gradio) (2.28.1)\n",
|
474 |
+
"Requirement already satisfied: tqdm>=4.42.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from huggingface-hub>=0.19.3->gradio) (4.66.2)\n",
|
475 |
+
"Requirement already satisfied: contourpy>=1.0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (1.2.0)\n",
|
476 |
+
"Requirement already satisfied: cycler>=0.10 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (0.12.1)\n",
|
477 |
+
"Requirement already satisfied: fonttools>=4.22.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (4.50.0)\n",
|
478 |
+
"Requirement already satisfied: kiwisolver>=1.3.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (1.4.5)\n",
|
479 |
+
"Requirement already satisfied: pyparsing>=2.3.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (3.1.2)\n",
|
480 |
+
"Requirement already satisfied: python-dateutil>=2.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (2.9.0)\n",
|
481 |
+
"Requirement already satisfied: pytz>=2020.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n",
|
482 |
+
"Requirement already satisfied: tzdata>=2022.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n",
|
483 |
+
"Requirement already satisfied: annotated-types>=0.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic>=2.0->gradio) (0.7.0)\n",
|
484 |
+
"Requirement already satisfied: pydantic-core==2.18.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic>=2.0->gradio) (2.18.4)\n",
|
485 |
+
"Requirement already satisfied: click>=8.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (8.1.7)\n",
|
486 |
+
"Collecting shellingham>=1.3.0 (from typer<1.0,>=0.12->gradio)\n",
|
487 |
+
" Downloading shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)\n",
|
488 |
+
"Requirement already satisfied: rich>=10.11.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (13.7.1)\n",
|
489 |
+
"Collecting starlette<0.38.0,>=0.37.2 (from fastapi->gradio)\n",
|
490 |
+
" Downloading starlette-0.37.2-py3-none-any.whl.metadata (5.9 kB)\n",
|
491 |
+
"Collecting fastapi-cli>=0.0.2 (from fastapi->gradio)\n",
|
492 |
+
" Downloading fastapi_cli-0.0.4-py3-none-any.whl.metadata (7.0 kB)\n",
|
493 |
+
"Requirement already satisfied: ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from fastapi->gradio) (5.9.0)\n",
|
494 |
+
"Collecting email_validator>=2.0.0 (from fastapi->gradio)\n",
|
495 |
+
" Downloading email_validator-2.1.2-py3-none-any.whl.metadata (26 kB)\n",
|
496 |
+
"Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi->gradio)\n",
|
497 |
+
" Downloading dnspython-2.6.1-py3-none-any.whl.metadata (5.8 kB)\n",
|
498 |
+
"Requirement already satisfied: attrs>=22.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (23.2.0)\n",
|
499 |
+
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (2023.12.1)\n",
|
500 |
+
"Requirement already satisfied: referencing>=0.28.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.34.0)\n",
|
501 |
+
"Requirement already satisfied: rpds-py>=0.7.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.18.0)\n",
|
502 |
+
"Requirement already satisfied: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n",
|
503 |
+
"Requirement already satisfied: markdown-it-py>=2.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n",
|
504 |
+
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.17.2)\n",
|
505 |
+
"Requirement already satisfied: exceptiongroup>=1.0.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio->httpx>=0.24.1->gradio) (1.2.0)\n",
|
506 |
+
"Collecting httptools>=0.5.0 (from uvicorn[standard]>=0.12.0->fastapi->gradio)\n",
|
507 |
+
" Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n",
|
508 |
+
"Collecting python-dotenv>=0.13 (from uvicorn[standard]>=0.12.0->fastapi->gradio)\n",
|
509 |
+
" Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n",
|
510 |
+
"Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.12.0->fastapi->gradio)\n",
|
511 |
+
" Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
|
512 |
+
"Collecting watchfiles>=0.13 (from uvicorn[standard]>=0.12.0->fastapi->gradio)\n",
|
513 |
+
" Downloading watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
|
514 |
+
"Requirement already satisfied: charset-normalizer<3,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests->huggingface-hub>=0.19.3->gradio) (2.1.1)\n",
|
515 |
+
"INFO: pip is looking at multiple versions of requests to determine which version is compatible with other requirements. This could take a while.\n"
|
516 |
+
]
|
517 |
+
},
|
518 |
+
{
|
519 |
+
"name": "stdout",
|
520 |
+
"output_type": "stream",
|
521 |
+
"text": [
|
522 |
+
"Collecting requests (from huggingface-hub>=0.19.3->gradio)\n",
|
523 |
+
" Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n",
|
524 |
+
"Requirement already satisfied: mdurl~=0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n",
|
525 |
+
"Downloading gradio-4.36.1-py3-none-any.whl (12.3 MB)\n",
|
526 |
+
"\u001b[2K \u001b[90mββββββββββββοΏ½οΏ½βββββββββββββββββββββββββββ\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m29.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
|
527 |
+
"\u001b[?25hDownloading gradio_client-1.0.1-py3-none-any.whl (318 kB)\n",
|
528 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m318.1/318.1 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
529 |
+
"\u001b[?25hDownloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n",
|
530 |
+
"Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n",
|
531 |
+
"Downloading huggingface_hub-0.23.4-py3-none-any.whl (402 kB)\n",
|
532 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m402.6/402.6 kB\u001b[0m \u001b[31m56.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
533 |
+
"\u001b[?25hDownloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n",
|
534 |
+
"Downloading ruff-0.4.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.8 MB)\n",
|
535 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m89.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
536 |
+
"\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
|
537 |
+
"Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
|
538 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m767.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
539 |
+
"\u001b[?25hDownloading urllib3-2.2.2-py3-none-any.whl (121 kB)\n",
|
540 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m121.4/121.4 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
541 |
+
"\u001b[?25hDownloading uvicorn-0.30.1-py3-none-any.whl (62 kB)\n",
|
542 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
543 |
+
"\u001b[?25hDownloading fastapi-0.111.0-py3-none-any.whl (91 kB)\n",
|
544 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
545 |
+
"\u001b[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
|
546 |
+
"Downloading email_validator-2.1.2-py3-none-any.whl (30 kB)\n",
|
547 |
+
"Downloading fastapi_cli-0.0.4-py3-none-any.whl (9.5 kB)\n",
|
548 |
+
"Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
|
549 |
+
"Downloading starlette-0.37.2-py3-none-any.whl (71 kB)\n",
|
550 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
551 |
+
"\u001b[?25hDownloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
|
552 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m22.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
553 |
+
"\u001b[?25hDownloading requests-2.32.3-py3-none-any.whl (64 kB)\n",
|
554 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
555 |
+
"\u001b[?25hDownloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n",
|
556 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
557 |
+
"\u001b[?25hDownloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n",
|
558 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m44.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
559 |
+
"\u001b[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
|
560 |
+
"Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
|
561 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m27.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
562 |
+
"\u001b[?25hDownloading watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
|
563 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
564 |
+
"\u001b[?25hBuilding wheels for collected packages: ffmpy\n",
|
565 |
+
" Building wheel for ffmpy (setup.py) ... \u001b[?25ldone\n",
|
566 |
+
"\u001b[?25h Created wheel for ffmpy: filename=ffmpy-0.3.2-py3-none-any.whl size=5584 sha256=0397b4aac7e8bc2cd10586f4161b48a2c33365d31a941eaaf05df08b9e661664\n",
|
567 |
+
" Stored in directory: /home/ec2-user/.cache/pip/wheels/bd/65/9a/671fc6dcde07d4418df0c592f8df512b26d7a0029c2a23dd81\n",
|
568 |
+
"Successfully built ffmpy\n",
|
569 |
+
"Installing collected packages: pydub, ffmpy, websockets, uvloop, uvicorn, urllib3, tomlkit, shellingham, semantic-version, ruff, python-multipart, python-dotenv, httptools, dnspython, aiofiles, watchfiles, starlette, requests, email_validator, typer, huggingface-hub, gradio-client, fastapi-cli, fastapi, gradio\n",
|
570 |
+
" Attempting uninstall: urllib3\n",
|
571 |
+
" Found existing installation: urllib3 1.26.19\n",
|
572 |
+
" Uninstalling urllib3-1.26.19:\n",
|
573 |
+
" Successfully uninstalled urllib3-1.26.19\n",
|
574 |
+
" Attempting uninstall: tomlkit\n",
|
575 |
+
" Found existing installation: tomlkit 0.12.4\n",
|
576 |
+
" Uninstalling tomlkit-0.12.4:\n",
|
577 |
+
" Successfully uninstalled tomlkit-0.12.4\n",
|
578 |
+
" Attempting uninstall: requests\n",
|
579 |
+
" Found existing installation: requests 2.28.1\n",
|
580 |
+
" Uninstalling requests-2.28.1:\n",
|
581 |
+
" Successfully uninstalled requests-2.28.1\n",
|
582 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
583 |
+
"botocore 1.31.85 requires urllib3<2.1,>=1.25.4; python_version >= \"3.10\", but you have urllib3 2.2.2 which is incompatible.\n",
|
584 |
+
"sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
|
585 |
+
"sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
|
586 |
+
"sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
|
587 |
+
"\u001b[0mSuccessfully installed aiofiles-23.2.1 dnspython-2.6.1 email_validator-2.1.2 fastapi-0.111.0 fastapi-cli-0.0.4 ffmpy-0.3.2 gradio-4.36.1 gradio-client-1.0.1 httptools-0.6.1 huggingface-hub-0.23.4 pydub-0.25.1 python-dotenv-1.0.1 python-multipart-0.0.9 requests-2.32.3 ruff-0.4.9 semantic-version-2.10.0 shellingham-1.5.4 starlette-0.37.2 tomlkit-0.12.0 typer-0.12.3 urllib3-2.2.2 uvicorn-0.30.1 uvloop-0.19.0 watchfiles-0.22.0 websockets-11.0.3\n"
|
588 |
+
]
|
589 |
+
}
|
590 |
+
],
|
591 |
+
"source": [
|
592 |
+
"!pip install boto3==1.28.44\n",
|
593 |
+
"!pip install docx==0.2.4\n",
|
594 |
+
"!pip install httpx[http2]==0.26.0\n",
|
595 |
+
"!pip install langchain==0.2.0\n",
|
596 |
+
"!pip install openai==1.30.1\n",
|
597 |
+
"!pip install pandas==2.2.2\n",
|
598 |
+
"!pip install pinecone-client==4.1.0\n",
|
599 |
+
"!pip install streamlit==1.32.2\n",
|
600 |
+
"!pip install requests==2.28.1\n",
|
601 |
+
"!pip install python-docx==0.8.11\n",
|
602 |
+
"!pip install langchain-community\n",
|
603 |
+
"!pip install gradio"
|
604 |
+
]
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"cell_type": "code",
|
608 |
+
"execution_count": 73,
|
609 |
+
"id": "35eed6ac",
|
610 |
+
"metadata": {},
|
611 |
+
"outputs": [
|
612 |
+
{
|
613 |
+
"name": "stdout",
|
614 |
+
"output_type": "stream",
|
615 |
+
"text": [
|
616 |
+
"Total number of document IDs: 9\n"
|
617 |
+
]
|
618 |
+
},
|
619 |
+
{
|
620 |
+
"name": "stderr",
|
621 |
+
"output_type": "stream",
|
622 |
+
"text": [
|
623 |
+
"/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
|
624 |
+
" \"https://urllib3.readthedocs.io/en/latest/advanced-usage.html\"\n",
|
625 |
+
"/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'api.nlp.dev.uptimize.merckgroup.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
|
626 |
+
" \"https://urllib3.readthedocs.io/en/latest/advanced-usage.html\"\n"
|
627 |
+
]
|
628 |
+
},
|
629 |
+
{
|
630 |
+
"name": "stdout",
|
631 |
+
"output_type": "stream",
|
632 |
+
"text": [
|
633 |
+
"{'results': [], 'matches': [{'id': 'gtn/input/knowledge_base/AUTOMATED FORECASTING PROCESS 4.docx-1', 'score': 0.879279137, 'values': []}, {'id': 'gtn/input/knowledge_base/automated adjustments process 3.docx-7', 'score': 0.827985406, 'values': []}], 'namespace': '', 'usage': {'readUnits': 5}}\n",
|
634 |
+
"Question: What should be the S3 bucket for final Forecast dump exported from SAC?\n",
|
635 |
+
"Documents: ['Step:8:- \\nExport the finalized forecast dump from SAP Analytics Cloud(SAC).', 'AUTOMATED ADJUSTMENTS PROCESS\\nStep:1:- \\nLoad the final Forecast dump exported from SAC into S3 bucket with path S3://emd-forecast/gtn/output/SAC_forecast_dump\\nStep:2:- \\nLoad business files into S3 bucket with path S3://emd-forecast/gtn/output/business_files\\nStep:3:- \\nLoad Mapping template for all the families into S3 bucket with path S3://emd-forecast/gtn/output\\nStep:4:- \\nRun Datacleaning.ipynb to convert the business files into readable and understandable format.\\nStep:5:- \\nRun Units_gross_']\n",
|
636 |
+
"The S3 bucket for the final Forecast dump exported from SAC should be S3://emd-forecast/gtn/output/SAC_forecast_dump.\n"
|
637 |
+
]
|
638 |
+
}
|
639 |
+
],
|
640 |
+
"source": [
|
641 |
+
"import os\n",
|
642 |
+
"from docx import Document\n",
|
643 |
+
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
644 |
+
"from openai import AzureOpenAI\n",
|
645 |
+
"import httpx\n",
|
646 |
+
"import requests\n",
|
647 |
+
"import json\n",
|
648 |
+
"import openai\n",
|
649 |
+
"import pinecone\n",
|
650 |
+
"from langchain.document_loaders import DirectoryLoader\n",
|
651 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
652 |
+
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
653 |
+
"from langchain.vectorstores import Pinecone\n",
|
654 |
+
"from langchain.llms import OpenAI\n",
|
655 |
+
"from langchain.chains.question_answering import load_qa_chain\n",
|
656 |
+
"import boto3\n",
|
657 |
+
"# Initialize a session using Amazon S3\n",
|
658 |
+
"s3 = boto3.client('s3')\n",
|
659 |
+
"import io\n",
|
660 |
+
"import pinecone\n",
|
661 |
+
"from pinecone import Pinecone, ServerlessSpec\n",
|
662 |
+
"import requests\n",
|
663 |
+
"from pinecone import Pinecone\n",
|
664 |
+
"httpx_client = httpx.Client(http2=True, verify='cacert.pem')\n",
|
665 |
+
"\n",
|
666 |
+
"# Initialize AzureOpenAI client with the HTTP client\n",
|
667 |
+
"client = AzureOpenAI(\n",
|
668 |
+
" azure_endpoint=\"https://api.nlp.dev.uptimize.merckgroup.com\",\n",
|
669 |
+
" api_key='c6140592-6c65-4261-a959-2e2ba099526d',\n",
|
670 |
+
" api_version=\"2023-09-01-preview\",\n",
|
671 |
+
" http_client=httpx_client\n",
|
672 |
+
")\n",
|
673 |
+
"\n",
|
674 |
+
"# Initialize Pinecone client\n",
|
675 |
+
"pc = Pinecone(\n",
|
676 |
+
" api_key=\"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
|
677 |
+
")\n",
|
678 |
+
"\n",
|
679 |
+
"# Connect to the index\n",
|
680 |
+
"index_name = \"ktbot\"\n",
|
681 |
+
"index = pc.Index(index_name)\n",
|
682 |
+
"\n",
|
683 |
+
"# Collect all document IDs with pagination\n",
|
684 |
+
"doc_ids = []\n",
|
685 |
+
"limit = 90 # Maximum number of vector IDs per request\n",
|
686 |
+
"next_token = None\n",
|
687 |
+
"\n",
|
688 |
+
"while True:\n",
|
689 |
+
" response = index.list_paginated(namespace=None, limit=limit, pagination_token=next_token)\n",
|
690 |
+
" \n",
|
691 |
+
" # Collect IDs from the response\n",
|
692 |
+
" doc_ids.extend([v.id for v in response.vectors])\n",
|
693 |
+
" \n",
|
694 |
+
" # Update the next_token for pagination\n",
|
695 |
+
" next_token = response.next_page_token\n",
|
696 |
+
" \n",
|
697 |
+
" # Break the loop if there is no next token\n",
|
698 |
+
" if not next_token:\n",
|
699 |
+
" break\n",
|
700 |
+
"\n",
|
701 |
+
"\n",
|
702 |
+
"print(f\"Total number of document IDs: {len(doc_ids)}\")\n",
|
703 |
+
"\n",
|
704 |
+
"# Construct the API endpoint for fetching embeddings\n",
|
705 |
+
"base_url = \"https://ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io\"\n",
|
706 |
+
"endpoint = \"/vectors/fetch\"\n",
|
707 |
+
"\n",
|
708 |
+
"# Prepare headers\n",
|
709 |
+
"api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
|
710 |
+
"headers = {\n",
|
711 |
+
" \"Content-Type\": \"application/json\",\n",
|
712 |
+
" \"Api-Key\": api_key\n",
|
713 |
+
"}\n",
|
714 |
+
"\n",
|
715 |
+
"# Dictionary to store the full text for each document chunk\n",
|
716 |
+
"doc_text_dict = {}\n",
|
717 |
+
"\n",
|
718 |
+
"# Function to fetch embedding for a given document ID\n",
|
719 |
+
"def fetch_metadata(doc_id):\n",
|
720 |
+
" url = f\"{base_url}{endpoint}?ids={doc_id}\"\n",
|
721 |
+
" try:\n",
|
722 |
+
" # Send the GET request to fetch metadata\n",
|
723 |
+
" response = requests.get(url, headers=headers)\n",
|
724 |
+
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
725 |
+
"\n",
|
726 |
+
" # Check HTTP response status\n",
|
727 |
+
" if response.status_code == 200:\n",
|
728 |
+
" # Attempt to parse JSON response\n",
|
729 |
+
" try:\n",
|
730 |
+
" data = response.json()\n",
|
731 |
+
" if \"vectors\" in data and doc_id in data[\"vectors\"]:\n",
|
732 |
+
" vector_data = data[\"vectors\"][doc_id]\n",
|
733 |
+
" return vector_data.get('metadata', {}).get('text', '') # Return the text from metadata\n",
|
734 |
+
" else:\n",
|
735 |
+
" print(f\"No metadata found for document ID: {doc_id}\")\n",
|
736 |
+
" return None\n",
|
737 |
+
" except ValueError:\n",
|
738 |
+
" print(\"Response is not valid JSON.\")\n",
|
739 |
+
" print(\"Response content:\", response.text)\n",
|
740 |
+
" return None\n",
|
741 |
+
" else:\n",
|
742 |
+
" print(f\"Failed to fetch metadata: {response.status_code} - {response.text}\")\n",
|
743 |
+
" return None\n",
|
744 |
+
"\n",
|
745 |
+
" except requests.exceptions.RequestException as e:\n",
|
746 |
+
" print(f\"Error fetching document: {e}\")\n",
|
747 |
+
" return None\n",
|
748 |
+
"\n",
|
749 |
+
"# Fetch metadata for all document IDs and populate the doc_text_dict\n",
|
750 |
+
"for doc_id in doc_ids:\n",
|
751 |
+
" text = fetch_metadata(doc_id)\n",
|
752 |
+
" if text:\n",
|
753 |
+
" doc_text_dict[doc_id] = text\n",
|
754 |
+
"\n",
|
755 |
+
"\n",
|
756 |
+
"\n",
|
757 |
+
"# Preprocess the id to extract a consistent format\n",
|
758 |
+
"# Preprocess the id to extract a consistent format\n",
|
759 |
+
"def preprocess_id(id_str):\n",
|
760 |
+
" if id_str.startswith(\"page_content=\"):\n",
|
761 |
+
" return id_str.split(\"=\")[1].strip(\"'\")\n",
|
762 |
+
" else:\n",
|
763 |
+
" return id_str\n",
|
764 |
+
"\n",
|
765 |
+
"\n",
|
766 |
+
"\n",
|
767 |
+
"# Update the get_similar_docs function to preprocess the id before retrieval\n",
|
768 |
+
"def get_similar_docs(query, k=2, score=False):\n",
|
769 |
+
" # Generate the embedding for the query\n",
|
770 |
+
" query_embedding_response = client.embeddings.create(input=query, model=\"text-embedding-ada-002-v2\")\n",
|
771 |
+
" query_embedding = query_embedding_response.data[0].embedding\n",
|
772 |
+
" \n",
|
773 |
+
" # Search the Pinecone index for similar documents\n",
|
774 |
+
" query_payload = {\n",
|
775 |
+
" \"top_k\": k,\n",
|
776 |
+
" \"include_values\": score,\n",
|
777 |
+
" \"vector\": query_embedding\n",
|
778 |
+
" }\n",
|
779 |
+
" query_endpoint = f\"{base_url}/query\"\n",
|
780 |
+
" query_response = requests.post(query_endpoint, headers=headers, json=query_payload, verify=False)\n",
|
781 |
+
" \n",
|
782 |
+
" # Extract and return the similar documents\n",
|
783 |
+
" if query_response.status_code == 200:\n",
|
784 |
+
" search_results = query_response.json()\n",
|
785 |
+
" print(search_results)\n",
|
786 |
+
" similar_docs = [doc_text_dict[preprocess_id(match['id'])] for match in search_results['matches']]\n",
|
787 |
+
" return similar_docs\n",
|
788 |
+
" else:\n",
|
789 |
+
" print(f\"Failed to retrieve similar documents: {query_response.status_code} - {query_response.reason}\")\n",
|
790 |
+
" print(\"Response content:\", query_response.text)\n",
|
791 |
+
" return []\n",
|
792 |
+
"\n",
|
793 |
+
"\n",
|
794 |
+
"def get_answer(query):\n",
|
795 |
+
" similar_docs = get_similar_docs(query)\n",
|
796 |
+
" combined_message = f\"Question: {query}\\nDocuments: {similar_docs}\"\n",
|
797 |
+
" print(combined_message)\n",
|
798 |
+
" openai_key = 'c6140592-6c65-4261-a959-2e2ba099526d'\n",
|
799 |
+
" openai_url = \"https://api.nlp.dev.uptimize.merckgroup.com/openai/deployments/gpt-4-turbo/chat/completions?api-version=2023-09-01-preview\"\n",
|
800 |
+
" headers = {\n",
|
801 |
+
" \"Content-Type\": \"application/json\",\n",
|
802 |
+
" \"x-api-key\": openai_key,\n",
|
803 |
+
" \"region\": \"EU\"\n",
|
804 |
+
" }\n",
|
805 |
+
" \n",
|
806 |
+
" payload = {\n",
|
807 |
+
" \"model\": \"gpt-4-turbo\",\n",
|
808 |
+
" \"messages\": [\n",
|
809 |
+
" {\"role\": \"user\", \"content\": combined_message}\n",
|
810 |
+
" ],\n",
|
811 |
+
" \"max_tokens\": 1024,\n",
|
812 |
+
" \"n\": 1,\n",
|
813 |
+
" \"temperature\": 0\n",
|
814 |
+
" }\n",
|
815 |
+
"\n",
|
816 |
+
" try:\n",
|
817 |
+
" response = requests.post(openai_url, headers=headers, data=json.dumps(payload), verify=False)\n",
|
818 |
+
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
819 |
+
" ChatGPT_reply = response.json()[\"choices\"][0][\"message\"][\"content\"]\n",
|
820 |
+
" return ChatGPT_reply\n",
|
821 |
+
" except requests.exceptions.RequestException as e:\n",
|
822 |
+
" print(\"ERROR\")\n",
|
823 |
+
" print(e)\n",
|
824 |
+
" raise Exception(f'Request failed: {e}')\n",
|
825 |
+
"\n",
|
826 |
+
"# Example usage:\n",
|
827 |
+
"query = \"What should be the S3 bucket for final Forecast dump exported from SAC?\"\n",
|
828 |
+
"answer = get_answer(query)\n",
|
829 |
+
"print(answer)\n"
|
830 |
+
]
|
831 |
+
},
|
832 |
+
{
|
833 |
+
"cell_type": "code",
|
834 |
+
"execution_count": 75,
|
835 |
+
"id": "27ed9037",
|
836 |
+
"metadata": {},
|
837 |
+
"outputs": [
|
838 |
+
{
|
839 |
+
"name": "stdout",
|
840 |
+
"output_type": "stream",
|
841 |
+
"text": [
|
842 |
+
"Running on local URL: http://127.0.0.1:7860\n",
|
843 |
+
"Running on public URL: https://0e7b0972f54b527958.gradio.live\n",
|
844 |
+
"\n",
|
845 |
+
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
|
846 |
+
]
|
847 |
+
},
|
848 |
+
{
|
849 |
+
"data": {
|
850 |
+
"text/html": [
|
851 |
+
"<div><iframe src=\"https://0e7b0972f54b527958.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
852 |
+
],
|
853 |
+
"text/plain": [
|
854 |
+
"<IPython.core.display.HTML object>"
|
855 |
+
]
|
856 |
+
},
|
857 |
+
"metadata": {},
|
858 |
+
"output_type": "display_data"
|
859 |
+
},
|
860 |
+
{
|
861 |
+
"name": "stdout",
|
862 |
+
"output_type": "stream",
|
863 |
+
"text": [
|
864 |
+
"Keyboard interruption in main thread... closing server.\n",
|
865 |
+
"Killing tunnel 127.0.0.1:7860 <> https://0e7b0972f54b527958.gradio.live\n"
|
866 |
+
]
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"ename": "TypeError",
|
870 |
+
"evalue": "EventListener._setup.<locals>.event_trigger() got an unexpected keyword argument '_js'",
|
871 |
+
"output_type": "error",
|
872 |
+
"traceback": [
|
873 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
874 |
+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
875 |
+
"Cell \u001b[0;32mIn[75], line 49\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;66;03m# Apply custom CSS using JavaScript\u001b[39;00m\n\u001b[1;32m 48\u001b[0m block\u001b[38;5;241m.\u001b[39mlaunch(debug\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, inbrowser\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,share\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 49\u001b[0m \u001b[43mblock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_js\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;43m (function() \u001b[39;49m\u001b[38;5;124;43m{\u001b[39;49m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;124;43m var style = document.createElement(\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mstyle\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m);\u001b[39;49m\n\u001b[1;32m 52\u001b[0m \u001b[38;5;124;43m style.innerHTML = `\u001b[39;49m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;124;43m #message-box textarea \u001b[39;49m\u001b[38;5;124;43m{\u001b[39;49m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;124;43m border: 1px solid black !important;\u001b[39;49m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;124;43m }\u001b[39;49m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;124;43m `;\u001b[39;49m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;124;43m document.head.appendChild(style);\u001b[39;49m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;124;43m })();\u001b[39;49m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
876 |
+
"\u001b[0;31mTypeError\u001b[0m: EventListener._setup.<locals>.event_trigger() got an unexpected keyword argument '_js'"
|
877 |
+
]
|
878 |
+
}
|
879 |
+
],
|
880 |
+
"source": [
|
881 |
+
"import base64\n",
|
882 |
+
"import gradio as gr\n",
|
883 |
+
"bucket_name='emd-forecast'\n",
|
884 |
+
"file_key = 'gtn/input/logo_ktbot.PNG'\n",
|
885 |
+
"\n",
|
886 |
+
"# Download the file from S3\n",
|
887 |
+
"response = s3.get_object(Bucket=bucket_name, Key=file_key)\n",
|
888 |
+
"file_content = response['Body'].read()\n",
|
889 |
+
"\n",
|
890 |
+
"# Encode the file content in base64\n",
|
891 |
+
"png_base64 = base64.b64encode(file_content).decode('utf-8')\n",
|
892 |
+
"png_data_url = f\"data:image/png;base64,{png_base64}\"\n",
|
893 |
+
"\n",
|
894 |
+
"\n",
|
895 |
+
"\n",
|
896 |
+
"\n",
|
897 |
+
"def message_and_history(input, history):\n",
|
898 |
+
" history = history or []\n",
|
899 |
+
" s = list(sum(history, ()))\n",
|
900 |
+
" s.append(input)\n",
|
901 |
+
" inp = ' '.join(s)\n",
|
902 |
+
" output = get_answer(inp)\n",
|
903 |
+
" history.append((input, output))\n",
|
904 |
+
" return history, history\n",
|
905 |
+
"\n",
|
906 |
+
"\n",
|
907 |
+
"block = gr.Blocks(theme=gr.themes.Soft())\n",
|
908 |
+
"\n",
|
909 |
+
"with block:\n",
|
910 |
+
" with gr.Row():\n",
|
911 |
+
" with gr.Column(scale=1, min_width=300):\n",
|
912 |
+
" gr.Markdown(f\"\"\"\n",
|
913 |
+
" <div style=\"display: flex; align-items: center; justify-content: center; height: calc(100% - 50px);\">\n",
|
914 |
+
" <img src=\"{png_data_url}\" alt=\"Logo\" style=\"height: auto; max-height: 100%;\">\n",
|
915 |
+
" </div>\n",
|
916 |
+
"\n",
|
917 |
+
" \"\"\")\n",
|
918 |
+
" with gr.Column(scale=2):\n",
|
919 |
+
" chatbot = gr.Chatbot(label=\"KT Bot\")\n",
|
920 |
+
" message = gr.Textbox(placeholder=\"Your KT bot, ask me anything...\",label=\"Query\")\n",
|
921 |
+
" state = gr.State()\n",
|
922 |
+
" submit = gr.Button(\"SEND\")\n",
|
923 |
+
" submit.click(message_and_history, \n",
|
924 |
+
" inputs=[message, state], \n",
|
925 |
+
" outputs=[chatbot, state])\n",
|
926 |
+
"\n",
|
927 |
+
"# Apply custom CSS using JavaScript\n",
|
928 |
+
"block.launch(debug=True, inbrowser=True,share=True)\n",
|
929 |
+
"block.load(None, None, None, _js=\"\"\"\n",
|
930 |
+
" (function() {\n",
|
931 |
+
" var style = document.createElement('style');\n",
|
932 |
+
" style.innerHTML = `\n",
|
933 |
+
" #message-box textarea {\n",
|
934 |
+
" border: 1px solid black !important;\n",
|
935 |
+
" }\n",
|
936 |
+
" `;\n",
|
937 |
+
" document.head.appendChild(style);\n",
|
938 |
+
" })();\n",
|
939 |
+
"\"\"\")\n"
|
940 |
+
]
|
941 |
+
},
|
942 |
+
{
|
943 |
+
"cell_type": "code",
|
944 |
+
"execution_count": 8,
|
945 |
+
"id": "2ed00191",
|
946 |
+
"metadata": {},
|
947 |
+
"outputs": [
|
948 |
+
{
|
949 |
+
"name": "stdout",
|
950 |
+
"output_type": "stream",
|
951 |
+
"text": [
|
952 |
+
"Requirement already satisfied: urllib3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (1.26.18)\n",
|
953 |
+
"Collecting urllib3\n",
|
954 |
+
" Using cached urllib3-2.2.1-py3-none-any.whl.metadata (6.4 kB)\n",
|
955 |
+
"Requirement already satisfied: pyopenssl in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (24.0.0)\n",
|
956 |
+
"Collecting pyopenssl\n",
|
957 |
+
" Downloading pyOpenSSL-24.1.0-py3-none-any.whl.metadata (12 kB)\n",
|
958 |
+
"Requirement already satisfied: cryptography<43,>=41.0.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pyopenssl) (42.0.5)\n",
|
959 |
+
"Requirement already satisfied: cffi>=1.12 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from cryptography<43,>=41.0.5->pyopenssl) (1.16.0)\n",
|
960 |
+
"Requirement already satisfied: pycparser in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from cffi>=1.12->cryptography<43,>=41.0.5->pyopenssl) (2.21)\n",
|
961 |
+
"Using cached urllib3-2.2.1-py3-none-any.whl (121 kB)\n",
|
962 |
+
"Downloading pyOpenSSL-24.1.0-py3-none-any.whl (56 kB)\n",
|
963 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
964 |
+
"\u001b[?25hInstalling collected packages: urllib3, pyopenssl\n",
|
965 |
+
" Attempting uninstall: urllib3\n",
|
966 |
+
" Found existing installation: urllib3 1.26.18\n",
|
967 |
+
" Uninstalling urllib3-1.26.18:\n",
|
968 |
+
" Successfully uninstalled urllib3-1.26.18\n",
|
969 |
+
" Attempting uninstall: pyopenssl\n",
|
970 |
+
" Found existing installation: pyOpenSSL 24.0.0\n",
|
971 |
+
" Uninstalling pyOpenSSL-24.0.0:\n",
|
972 |
+
" Successfully uninstalled pyOpenSSL-24.0.0\n",
|
973 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
974 |
+
"botocore 1.31.85 requires urllib3<2.1,>=1.25.4; python_version >= \"3.10\", but you have urllib3 2.2.1 which is incompatible.\n",
|
975 |
+
"requests 2.28.1 requires urllib3<1.27,>=1.21.1, but you have urllib3 2.2.1 which is incompatible.\n",
|
976 |
+
"sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
|
977 |
+
"sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
|
978 |
+
"sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
|
979 |
+
"\u001b[0mSuccessfully installed pyopenssl-24.1.0 urllib3-2.2.1\n"
|
980 |
+
]
|
981 |
+
}
|
982 |
+
],
|
983 |
+
"source": [
|
984 |
+
"!pip install --upgrade urllib3 pyopenssl\n"
|
985 |
+
]
|
986 |
+
},
|
987 |
+
{
|
988 |
+
"cell_type": "code",
|
989 |
+
"execution_count": null,
|
990 |
+
"id": "b80e67ff",
|
991 |
+
"metadata": {},
|
992 |
+
"outputs": [],
|
993 |
+
"source": [
|
994 |
+
"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"
|
995 |
+
]
|
996 |
+
},
|
997 |
+
{
|
998 |
+
"cell_type": "code",
|
999 |
+
"execution_count": 79,
|
1000 |
+
"id": "0751314c",
|
1001 |
+
"metadata": {},
|
1002 |
+
"outputs": [
|
1003 |
+
{
|
1004 |
+
"name": "stdout",
|
1005 |
+
"output_type": "stream",
|
1006 |
+
"text": [
|
1007 |
+
"Need \u001b[32m'write'\u001b[0m access token to create a Spaces repo.\n",
|
1008 |
+
"\n",
|
1009 |
+
" _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n",
|
1010 |
+
" _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
|
1011 |
+
" _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n",
|
1012 |
+
" _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
|
1013 |
+
" _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n",
|
1014 |
+
"\n",
|
1015 |
+
" To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
|
1016 |
+
"Enter your token (input will not be visible): \n",
|
1017 |
+
"\u001b[31mAborted.\u001b[0m\n"
|
1018 |
+
]
|
1019 |
+
}
|
1020 |
+
],
|
1021 |
+
"source": [
|
1022 |
+
"!gradio deploy"
|
1023 |
+
]
|
1024 |
+
},
|
1025 |
+
{
|
1026 |
+
"cell_type": "code",
|
1027 |
+
"execution_count": null,
|
1028 |
+
"id": "870ba009",
|
1029 |
+
"metadata": {},
|
1030 |
+
"outputs": [
|
1031 |
+
{
|
1032 |
+
"name": "stdout",
|
1033 |
+
"output_type": "stream",
|
1034 |
+
"text": [
|
1035 |
+
"Need \u001b[32m'write'\u001b[0m access token to create a Spaces repo.\r\n",
|
1036 |
+
"\r\n",
|
1037 |
+
" _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\r\n",
|
1038 |
+
" _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\r\n",
|
1039 |
+
" _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\r\n",
|
1040 |
+
" _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\r\n",
|
1041 |
+
" _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\r\n",
|
1042 |
+
"\r\n",
|
1043 |
+
" A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.\r\n",
|
1044 |
+
" Setting a new token will erase the existing one.\r\n",
|
1045 |
+
" To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\r\n",
|
1046 |
+
"Enter your token (input will not be visible): "
|
1047 |
+
]
|
1048 |
+
}
|
1049 |
+
],
|
1050 |
+
"source": [
|
1051 |
+
"!gradio deploy\n"
|
1052 |
+
]
|
1053 |
+
},
|
1054 |
+
{
|
1055 |
+
"cell_type": "code",
|
1056 |
+
"execution_count": null,
|
1057 |
+
"id": "ec155eeb",
|
1058 |
+
"metadata": {},
|
1059 |
+
"outputs": [],
|
1060 |
+
"source": []
|
1061 |
+
}
|
1062 |
+
],
|
1063 |
+
"metadata": {
|
1064 |
+
"kernelspec": {
|
1065 |
+
"display_name": "conda_python3",
|
1066 |
+
"language": "python",
|
1067 |
+
"name": "conda_python3"
|
1068 |
+
},
|
1069 |
+
"language_info": {
|
1070 |
+
"codemirror_mode": {
|
1071 |
+
"name": "ipython",
|
1072 |
+
"version": 3
|
1073 |
+
},
|
1074 |
+
"file_extension": ".py",
|
1075 |
+
"mimetype": "text/x-python",
|
1076 |
+
"name": "python",
|
1077 |
+
"nbconvert_exporter": "python",
|
1078 |
+
"pygments_lexer": "ipython3",
|
1079 |
+
"version": "3.10.14"
|
1080 |
+
}
|
1081 |
+
},
|
1082 |
+
"nbformat": 4,
|
1083 |
+
"nbformat_minor": 5
|
1084 |
+
}
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: gray
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.36.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: knowledgebot
|
3 |
+
app_file: KTBOT.ipynb
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 4.36.1
|
|
|
|
|
6 |
---
|
|
|
|
cacert.pem
ADDED
The diff for this file is too large to render.
See raw diff
|
|