heenakhilwani commited on
Commit
b8a28f6
β€’
1 Parent(s): daca530

Upload folder using huggingface_hub

Browse files
.Trash-1000/files/Index_update.ipynb ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "6c4c55f1",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "#deletion and creation of index\n",
11
+ "\n",
12
+ "import requests\n",
13
+ "\n",
14
+ "\n",
15
+ "api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
16
+ "base_url = \"https://api.pinecone.io\"\n",
17
+ "\n",
18
+ "# Index name\n",
19
+ "index_name = \"ktbot\"\n",
20
+ "\n",
21
+ "# Construct the delete URL for the index\n",
22
+ "delete_url = f\"{base_url}/indexes/{index_name}\"\n",
23
+ "\n",
24
+ "# Headers with API key\n",
25
+ "headers = {\n",
26
+ " \"Content-Type\": \"application/json\",\n",
27
+ " \"Api-Key\": api_key\n",
28
+ "}\n",
29
+ "\n",
30
+ "# Send the DELETE request to delete the existing index\n",
31
+ "response_delete = requests.delete(delete_url, headers=headers)\n",
32
+ "print(response_delete.status_code)\n",
33
+ "# Check the response status\n",
34
+ "if response_delete.status_code in (204, 202, 404):\n",
35
+ " print(f\"Index '{index_name}' deleted successfully.\")\n",
36
+ "else:\n",
37
+ " print(f\"Failed to delete index '{index_name}': {response_delete.status_code} - {response_delete.text}\")\n",
38
+ "\n",
39
+ "\n",
40
+ "\n",
41
+ "\n",
42
+ "# Construct the URL for creating the index\n",
43
+ "create_index_url = f\"{base_url}/indexes\"\n",
44
+ "\n",
45
+ "\n",
46
+ "# Define the payload for creating the index\n",
47
+ "payload = {\n",
48
+ " \"name\": index_name,\n",
49
+ " \"dimension\": 1536,\n",
50
+ " \"metric\": \"cosine\",\n",
51
+ " \"spec\": {\n",
52
+ " \"serverless\": {\n",
53
+ " \"cloud\": \"aws\",\n",
54
+ " \"region\": \"us-east-1\"\n",
55
+ " }\n",
56
+ " }\n",
57
+ "}\n",
58
+ "\n",
59
+ "# Send the POST request to create the index\n",
60
+ "response = requests.post(create_index_url, headers=headers, json=payload)\n",
61
+ "\n",
62
+ "# Check the response status\n",
63
+ "if response.status_code == 201:\n",
64
+ " print(f\"Index '{index_name}' created successfully.\")\n",
65
+ " response_json = json.loads(response.text)\n",
66
+ " host = response_json[\"host\"]\n",
67
+ " # Print the host\n",
68
+ " print(\"Host for index 'ktbot':\", host)\n",
69
+ "\n",
70
+ "else:\n",
71
+ " print(f\"Failed to create index '{index_name}': {response.status_code} - {response.text}\")\n"
72
+ ]
73
+ }
74
+ ],
75
+ "metadata": {
76
+ "kernelspec": {
77
+ "display_name": "conda_python3",
78
+ "language": "python",
79
+ "name": "conda_python3"
80
+ },
81
+ "language_info": {
82
+ "codemirror_mode": {
83
+ "name": "ipython",
84
+ "version": 3
85
+ },
86
+ "file_extension": ".py",
87
+ "mimetype": "text/x-python",
88
+ "name": "python",
89
+ "nbconvert_exporter": "python",
90
+ "pygments_lexer": "ipython3",
91
+ "version": "3.10.14"
92
+ }
93
+ },
94
+ "nbformat": 4,
95
+ "nbformat_minor": 5
96
+ }
.Trash-1000/files/Pinecone_upsert.ipynb ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "1459dedf",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Number of documents loaded: 3\n",
14
+ "Number of split documents: 9\n",
15
+ "Data upserted successfully.\n"
16
+ ]
17
+ },
18
+ {
19
+ "name": "stderr",
20
+ "output_type": "stream",
21
+ "text": [
22
+ "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/urllib3/connectionpool.py:1061: InsecureRequestWarning: Unverified HTTPS request is being made to host 'ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
23
+ " warnings.warn(\n"
24
+ ]
25
+ }
26
+ ],
27
+ "source": [
28
+ "import os\n",
29
+ "from docx import Document\n",
30
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
31
+ "from openai import AzureOpenAI\n",
32
+ "import httpx\n",
33
+ "import requests\n",
34
+ "import json\n",
35
+ "import openai\n",
36
+ "import pinecone\n",
37
+ "from langchain.document_loaders import DirectoryLoader\n",
38
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
39
+ "from langchain.embeddings.openai import OpenAIEmbeddings\n",
40
+ "from langchain.vectorstores import Pinecone\n",
41
+ "from langchain.llms import OpenAI\n",
42
+ "from langchain.chains.question_answering import load_qa_chain\n",
43
+ "import boto3\n",
44
+ "# Initialize a session using Amazon S3\n",
45
+ "s3 = boto3.client('s3')\n",
46
+ "import io\n",
47
+ "\n",
48
+ "def load_docx(file_content):\n",
49
+ " # Read the DOCX file content and return the text\n",
50
+ " doc = Document(io.BytesIO(file_content))\n",
51
+ " full_text = []\n",
52
+ " for para in doc.paragraphs:\n",
53
+ " full_text.append(para.text)\n",
54
+ " return '\\n'.join(full_text)\n",
55
+ "\n",
56
+ "def load_docs_from_s3(bucket_name, prefix):\n",
57
+ " documents = []\n",
58
+ " try:\n",
59
+ " # List all objects in the S3 bucket with the specified prefix\n",
60
+ " response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)\n",
61
+ " for obj in response.get('Contents', []):\n",
62
+ " if obj['Key'].endswith('.docx'):\n",
63
+ " # Read the file content from S3\n",
64
+ " file_obj = s3.get_object(Bucket=bucket_name, Key=obj['Key'])\n",
65
+ " file_content = file_obj['Body'].read()\n",
66
+ " text = load_docx(file_content)\n",
67
+ " if text:\n",
68
+ " documents.append((obj['Key'], text)) # Store filename and text together\n",
69
+ " except Exception as e:\n",
70
+ " print(f\"Error accessing S3 bucket: {e}\")\n",
71
+ " return documents\n",
72
+ "\n",
73
+ "# Example usage\n",
74
+ "\n",
75
+ "\n",
76
+ "bucket_name = 'emd-forecast'\n",
77
+ "prefix = 'gtn/input/knowledge_base/' # Replace with your actual prefix\n",
78
+ "\n",
79
+ "\n",
80
+ " \n",
81
+ "# Split documents into chunks\n",
82
+ "def split_docs(documents, chunk_size=1000, chunk_overlap=20):\n",
83
+ " text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)\n",
84
+ " docs = []\n",
85
+ " for doc_id, doc in documents:\n",
86
+ " splits = text_splitter.split_text(doc)\n",
87
+ " docs.extend([(doc_id, split) for split in splits])\n",
88
+ " return docs\n",
89
+ "\n",
90
+ "# Load documents from a directory\n",
91
+ "documents = load_docs_from_s3(bucket_name, prefix)\n",
92
+ "print(f\"Number of documents loaded: {len([d[0] for d in documents])}\")\n",
93
+ "\n",
94
+ "# Split documents into chunks\n",
95
+ "docs = split_docs(documents)\n",
96
+ "print(f\"Number of split documents: {len(docs)}\")\n",
97
+ "\n",
98
+ "# Initialize HTTP client\n",
99
+ "httpx_client = httpx.Client(http2=True, verify='cacert.pem')\n",
100
+ "\n",
101
+ "# Initialize AzureOpenAI client with the HTTP client\n",
102
+ "client = AzureOpenAI(\n",
103
+ " azure_endpoint=\"https://api.nlp.dev.uptimize.merckgroup.com\",\n",
104
+ " api_key='c6140592-6c65-4261-a959-2e2ba099526d',\n",
105
+ " api_version=\"2023-09-01-preview\",\n",
106
+ " http_client=httpx_client\n",
107
+ ")\n",
108
+ "\n",
109
+ "# Generate embeddings for all document chunks\n",
110
+ "embeddings = []\n",
111
+ "\n",
112
+ "for doc_id, doc in docs:\n",
113
+ " text = doc\n",
114
+ " embedding_response = client.embeddings.create(input=text, model=\"text-embedding-ada-002-v2\")\n",
115
+ " if embedding_response is not None:\n",
116
+ " embedding = embedding_response.data[0].embedding\n",
117
+ " embeddings.append((embedding, doc_id, text))\n",
118
+ "\n",
119
+ "api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
120
+ "base_url = \"https://ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io\"\n",
121
+ "\n",
122
+ "delete_url = f\"{base_url}/vectors/delete\"\n",
123
+ "\n",
124
+ "upsert_endpoint = f\"{base_url}/vectors/upsert\"\n",
125
+ "\n",
126
+ "# Headers with API key\n",
127
+ "headers = {\n",
128
+ " \"Content-Type\": \"application/json\",\n",
129
+ " \"Api-Key\": api_key\n",
130
+ "}\n",
131
+ "\n",
132
+ "dimension = 1536\n",
133
+ "for embed, doc_id, text in embeddings:\n",
134
+ " assert len(embed) == dimension, \"Embedding dimension mismatch\"\n",
135
+ "\n",
136
+ "# Prepare the data to be added to the index\n",
137
+ "data = {\n",
138
+ " \"vectors\": [\n",
139
+ " {\n",
140
+ " \"id\": f\"{doc_id}-{i}\", # Unique id for each chunk\n",
141
+ " \"values\": embed, # Convert numpy array to list\n",
142
+ " \"metadata\": {\"text\": text[:500]} # Store the first 500 characters of the text for preview\n",
143
+ " }\n",
144
+ " for i, (embed, doc_id, text) in enumerate(embeddings)\n",
145
+ " ]\n",
146
+ "}\n",
147
+ "\n",
148
+ "# Make the POST request to upsert data\n",
149
+ "response = requests.post(upsert_endpoint, headers=headers, json=data, verify=False)\n",
150
+ "\n",
151
+ "# Check if the request was successful\n",
152
+ "if response.status_code == 200:\n",
153
+ " print(\"Data upserted successfully.\")\n",
154
+ "else:\n",
155
+ " print(f\"Failed to upsert data: {response.status_code} - {response.reason}\")\n",
156
+ " print(\"Response content:\", response.text)\n"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": null,
162
+ "id": "0d07c356",
163
+ "metadata": {},
164
+ "outputs": [],
165
+ "source": []
166
+ }
167
+ ],
168
+ "metadata": {
169
+ "kernelspec": {
170
+ "display_name": "conda_pytorch_p310",
171
+ "language": "python",
172
+ "name": "conda_pytorch_p310"
173
+ },
174
+ "language_info": {
175
+ "codemirror_mode": {
176
+ "name": "ipython",
177
+ "version": 3
178
+ },
179
+ "file_extension": ".py",
180
+ "mimetype": "text/x-python",
181
+ "name": "python",
182
+ "nbconvert_exporter": "python",
183
+ "pygments_lexer": "ipython3",
184
+ "version": "3.10.14"
185
+ }
186
+ },
187
+ "nbformat": 4,
188
+ "nbformat_minor": 5
189
+ }
.Trash-1000/files/Untitled 1.ipynb ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
.Trash-1000/files/Untitled.ipynb ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "4b645a21",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "My public IP address is: 52.54.134.213\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "from requests import get\n",
19
+ "ip = get('https://api.ipify.org').content.decode('utf8')\n",
20
+ "print('My public IP address is: {}'.format(ip))"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 4,
26
+ "id": "35eed6ac",
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "Collecting docx\n",
34
+ " Downloading docx-0.2.4.tar.gz (54 kB)\n",
35
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.9/54.9 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
36
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
37
+ "\u001b[?25hCollecting lxml (from docx)\n",
38
+ " Downloading lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n",
39
+ "Requirement already satisfied: Pillow>=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from docx) (10.2.0)\n",
40
+ "Downloading lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.0 MB)\n",
41
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
42
+ "\u001b[?25hBuilding wheels for collected packages: docx\n",
43
+ " Building wheel for docx (setup.py) ... \u001b[?25ldone\n",
44
+ "\u001b[?25h Created wheel for docx: filename=docx-0.2.4-py3-none-any.whl size=53895 sha256=f41f957c2ae37f6fa8c9bc84a51bf04272a0b7723ee075b4ad795c4f26527ee1\n",
45
+ " Stored in directory: /home/ec2-user/.cache/pip/wheels/81/f5/1d/e09ba2c1907a43a4146d1189ae4733ca1a3bfe27ee39507767\n",
46
+ "Successfully built docx\n",
47
+ "Installing collected packages: lxml, docx\n",
48
+ "Successfully installed docx-0.2.4 lxml-5.2.2\n"
49
+ ]
50
+ }
51
+ ],
52
+ "source": [
53
+ "import os\n",
54
+ "from docx import Document\n",
55
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
56
+ "from openai import AzureOpenAI\n",
57
+ "import httpx\n",
58
+ "import requests\n",
59
+ "import json\n",
60
+ "import openai\n",
61
+ "import pinecone\n",
62
+ "from langchain.document_loaders import DirectoryLoader\n",
63
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
64
+ "from langchain.embeddings.openai import OpenAIEmbeddings\n",
65
+ "from langchain.vectorstores import Pinecone\n",
66
+ "from langchain.llms import OpenAI\n",
67
+ "from langchain.chains.question_answering import load_qa_chain\n",
68
+ "# Initialize a session using Amazon S3\n",
69
+ "s3 = boto3.client('s3')\n",
70
+ "\n",
71
+ "def load_docx(file_content):\n",
72
+ " # Read the DOCX file content and return the text\n",
73
+ " doc = Document(io.BytesIO(file_content))\n",
74
+ " full_text = []\n",
75
+ " for para in doc.paragraphs:\n",
76
+ " full_text.append(para.text)\n",
77
+ " return '\\n'.join(full_text)\n",
78
+ "\n",
79
+ "def load_docs_from_s3(bucket_name, prefix):\n",
80
+ " documents = []\n",
81
+ " try:\n",
82
+ " # List all objects in the S3 bucket with the specified prefix\n",
83
+ " response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)\n",
84
+ " for obj in response.get('Contents', []):\n",
85
+ " if obj['Key'].endswith('.docx'):\n",
86
+ " # Read the file content from S3\n",
87
+ " file_obj = s3.get_object(Bucket=bucket_name, Key=obj['Key'])\n",
88
+ " file_content = file_obj['Body'].read()\n",
89
+ " text = load_docx(file_content)\n",
90
+ " if text:\n",
91
+ " documents.append((obj['Key'], text)) # Store filename and text together\n",
92
+ " except Exception as e:\n",
93
+ " print(f\"Error accessing S3 bucket: {e}\")\n",
94
+ " return documents\n",
95
+ "\n",
96
+ "# Example usage\n",
97
+ "\n",
98
+ "\n",
99
+ "bucket_name = 'emd-forecast'\n",
100
+ "prefix = 'gtn/input/knowledge_base/' # Replace with your actual prefix\n",
101
+ "\n",
102
+ "\n",
103
+ " \n",
104
+ "# Split documents into chunks\n",
105
+ "def split_docs(documents, chunk_size=1000, chunk_overlap=20):\n",
106
+ " text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)\n",
107
+ " docs = []\n",
108
+ " for doc_id, doc in documents:\n",
109
+ " splits = text_splitter.split_text(doc)\n",
110
+ " docs.extend([(doc_id, split) for split in splits])\n",
111
+ " return docs\n",
112
+ "\n",
113
+ "# Load documents from a directory\n",
114
+ "documents = load_docs_from_s3(bucket_name, prefix)\n",
115
+ "print(f\"Number of documents loaded: {len([d[0] for d in documents])}\")\n",
116
+ "\n",
117
+ "# Split documents into chunks\n",
118
+ "docs = split_docs(documents)\n",
119
+ "print(f\"Number of split documents: {len(docs)}\")\n",
120
+ "\n",
121
+ "# Initialize HTTP client\n",
122
+ "httpx_client = httpx.Client(http2=True, verify='cacert.pem')\n",
123
+ "\n",
124
+ "# Initialize AzureOpenAI client with the HTTP client\n",
125
+ "client = AzureOpenAI(\n",
126
+ " azure_endpoint=\"https://api.nlp.dev.uptimize.merckgroup.com\",\n",
127
+ " api_key='c6140592-6c65-4261-a959-2e2ba099526d',\n",
128
+ " api_version=\"2023-09-01-preview\",\n",
129
+ " http_client=httpx_client\n",
130
+ ")\n",
131
+ "\n",
132
+ "# Generate embeddings for all document chunks\n",
133
+ "embeddings = []\n",
134
+ "\n",
135
+ "for doc_id, doc in docs:\n",
136
+ " text = doc\n",
137
+ " embedding_response = client.embeddings.create(input=text, model=\"text-embedding-ada-002\")\n",
138
+ " if embedding_response is not None:\n",
139
+ " embedding = embedding_response.data[0].embedding\n",
140
+ " embeddings.append((embedding, doc_id, text))\n",
141
+ "\n",
142
+ "api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
143
+ "base_url = \"https://ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io\"\n",
144
+ "\n",
145
+ "delete_url = f\"{base_url}/vectors/delete\"\n",
146
+ "\n",
147
+ "upsert_endpoint = f\"{base_url}/vectors/upsert\"\n",
148
+ "\n",
149
+ "# Headers with API key\n",
150
+ "headers = {\n",
151
+ " \"Content-Type\": \"application/json\",\n",
152
+ " \"Api-Key\": api_key\n",
153
+ "}\n",
154
+ "\n",
155
+ "dimension = 1536\n",
156
+ "for embed, doc_id, text in embeddings:\n",
157
+ " assert len(embed) == dimension, \"Embedding dimension mismatch\"\n",
158
+ "\n",
159
+ "# Prepare the data to be added to the index\n",
160
+ "data = {\n",
161
+ " \"vectors\": [\n",
162
+ " {\n",
163
+ " \"id\": f\"{doc_id}-{i}\", # Unique id for each chunk\n",
164
+ " \"values\": embed, # Convert numpy array to list\n",
165
+ " \"metadata\": {\"text\": text[:500]} # Store the first 500 characters of the text for preview\n",
166
+ " }\n",
167
+ " for i, (embed, doc_id, text) in enumerate(embeddings)\n",
168
+ " ]\n",
169
+ "}\n",
170
+ "\n",
171
+ "# Make the POST request to upsert data\n",
172
+ "response = requests.post(upsert_endpoint, headers=headers, json=data, verify=False)\n",
173
+ "\n",
174
+ "# Check if the request was successful\n",
175
+ "if response.status_code == 200:\n",
176
+ " print(\"Data upserted successfully.\")\n",
177
+ "else:\n",
178
+ " print(f\"Failed to upsert data: {response.status_code} - {response.reason}\")\n",
179
+ " print(\"Response content:\", response.text)\n",
180
+ "\n",
181
+ "# Dictionary to store the full text for each document chunk\n",
182
+ "doc_text_dict = {f\"{doc_id}-{i}\": text for i, (embed, doc_id, text) in enumerate(embeddings)}\n",
183
+ "# Preprocess the id to extract a consistent format\n",
184
+ "# Preprocess the id to extract a consistent format\n",
185
+ "def preprocess_id(id_str):\n",
186
+ " if id_str.startswith(\"page_content=\"):\n",
187
+ " return id_str.split(\"=\")[1].strip(\"'\")\n",
188
+ " else:\n",
189
+ " return id_str\n",
190
+ "\n",
191
+ "\n",
192
+ "\n",
193
+ "# Update the get_similar_docs function to preprocess the id before retrieval\n",
194
+ "def get_similar_docs(query, k=2, score=False):\n",
195
+ " # Generate the embedding for the query\n",
196
+ " query_embedding_response = client.embeddings.create(input=query, model=\"text-embedding-ada-002\")\n",
197
+ " query_embedding = query_embedding_response.data[0].embedding\n",
198
+ " \n",
199
+ " # Search the Pinecone index for similar documents\n",
200
+ " query_payload = {\n",
201
+ " \"top_k\": k,\n",
202
+ " \"include_values\": score,\n",
203
+ " \"vector\": query_embedding\n",
204
+ " }\n",
205
+ " query_endpoint = f\"{base_url}/query\"\n",
206
+ " query_response = requests.post(query_endpoint, headers=headers, json=query_payload, verify=False)\n",
207
+ " \n",
208
+ " # Extract and return the similar documents\n",
209
+ " if query_response.status_code == 200:\n",
210
+ " search_results = query_response.json()\n",
211
+ " print(search_results)\n",
212
+ " similar_docs = [doc_text_dict[preprocess_id(match['id'])] for match in search_results['matches']]\n",
213
+ " return similar_docs\n",
214
+ " else:\n",
215
+ " print(f\"Failed to retrieve similar documents: {query_response.status_code} - {query_response.reason}\")\n",
216
+ " print(\"Response content:\", query_response.text)\n",
217
+ " return []\n",
218
+ "\n",
219
+ "\n",
220
+ "def get_answer(query):\n",
221
+ " similar_docs = get_similar_docs(query)\n",
222
+ " combined_message = f\"Question: {query}\\nDocuments: {similar_docs}\"\n",
223
+ " openai_key = 'c6140592-6c65-4261-a959-2e2ba099526d'\n",
224
+ " openai_url = \"https://api.nlp.dev.uptimize.merckgroup.com/openai/deployments/gpt-4-turbo/chat/completions?api-version=2023-09-01-preview\"\n",
225
+ " headers = {\n",
226
+ " \"Content-Type\": \"application/json\",\n",
227
+ " \"x-api-key\": openai_key,\n",
228
+ " \"region\": \"EU\"\n",
229
+ " }\n",
230
+ " \n",
231
+ " payload = {\n",
232
+ " \"model\": \"gpt-4-turbo\",\n",
233
+ " \"messages\": [\n",
234
+ " {\"role\": \"user\", \"content\": combined_message}\n",
235
+ " ],\n",
236
+ " \"max_tokens\": 1024,\n",
237
+ " \"n\": 1,\n",
238
+ " \"temperature\": 0\n",
239
+ " }\n",
240
+ "\n",
241
+ " try:\n",
242
+ " response = requests.post(openai_url, headers=headers, data=json.dumps(payload), verify=False)\n",
243
+ " response.raise_for_status() # Raise an exception for HTTP errors\n",
244
+ " ChatGPT_reply = response.json()[\"choices\"][0][\"message\"][\"content\"]\n",
245
+ " return ChatGPT_reply\n",
246
+ " except requests.exceptions.RequestException as e:\n",
247
+ " print(\"ERROR\")\n",
248
+ " print(e)\n",
249
+ " raise Exception(f'Request failed: {e}')\n",
250
+ "\n",
251
+ "# Example usage:\n",
252
+ "query = \"What should be the S3 bucket for final Forecast dump exported from SAC?\"\n",
253
+ "answer = get_answer(query)\n",
254
+ "print(answer)\n"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "execution_count": null,
260
+ "id": "a1fc8e0d",
261
+ "metadata": {},
262
+ "outputs": [],
263
+ "source": [
264
+ "file_key = 'gtn/input/logo_ktbot.png'\n",
265
+ "\n",
266
+ "# Download the file from S3\n",
267
+ "response = s3.get_object(Bucket=bucket_name, Key=file_key)\n",
268
+ "file_content = response['Body'].read()\n",
269
+ "\n",
270
+ "# Encode the file content in base64\n",
271
+ "png_base64 = base64.b64encode(file_content).decode('utf-8')\n",
272
+ "png_data_url = f\"data:image/png;base64,{png_base64}\"\n",
273
+ "\n",
274
+ "# Print the data URL\n",
275
+ "print(png_data_url)\n",
276
+ "\n",
277
+ "\n",
278
+ "def message_and_history(input, history):\n",
279
+ " history = history or []\n",
280
+ " s = list(sum(history, ()))\n",
281
+ " s.append(input)\n",
282
+ " inp = ' '.join(s)\n",
283
+ " output = get_answer(inp)\n",
284
+ " history.append((input, output))\n",
285
+ " return history, history\n",
286
+ "\n",
287
+ "\n",
288
+ "block = gr.Blocks(theme=gr.themes.Soft())\n",
289
+ "\n",
290
+ "with block:\n",
291
+ " with gr.Row():\n",
292
+ " with gr.Column(scale=1, min_width=300):\n",
293
+ " gr.Markdown(f\"\"\"\n",
294
+ " <div style=\"display: flex; align-items: center; justify-content: center; height: calc(100% - 50px);\">\n",
295
+ " <img src=\"{png_data_url}\" alt=\"Logo\" style=\"height: auto; max-height: 100%;\">\n",
296
+ " </div>\n",
297
+ "\n",
298
+ " \"\"\")\n",
299
+ " with gr.Column(scale=2):\n",
300
+ " chatbot = gr.Chatbot(label=\"KT Bot\")\n",
301
+ " message = gr.Textbox(placeholder=\"Your KT bot, ask me anything...\",label=\"Query\")\n",
302
+ " state = gr.State()\n",
303
+ " submit = gr.Button(\"SEND\")\n",
304
+ " submit.click(message_and_history, \n",
305
+ " inputs=[message, state], \n",
306
+ " outputs=[chatbot, state])\n",
307
+ "\n",
308
+ "# Apply custom CSS using JavaScript\n",
309
+ "block.launch(debug=True, inbrowser=True,share=True)\n",
310
+ "block.load(None, None, None, _js=\"\"\"\n",
311
+ " (function() {\n",
312
+ " var style = document.createElement('style');\n",
313
+ " style.innerHTML = `\n",
314
+ " #message-box textarea {\n",
315
+ " border: 1px solid black !important;\n",
316
+ " }\n",
317
+ " `;\n",
318
+ " document.head.appendChild(style);\n",
319
+ " })();\n",
320
+ "\"\"\")\n"
321
+ ]
322
+ },
323
+ {
324
+ "cell_type": "code",
325
+ "execution_count": null,
326
+ "id": "cebfb55e",
327
+ "metadata": {},
328
+ "outputs": [],
329
+ "source": []
330
+ },
331
+ {
332
+ "cell_type": "code",
333
+ "execution_count": null,
334
+ "id": "f8256f50",
335
+ "metadata": {},
336
+ "outputs": [],
337
+ "source": []
338
+ }
339
+ ],
340
+ "metadata": {
341
+ "kernelspec": {
342
+ "display_name": "conda_python3",
343
+ "language": "python",
344
+ "name": "conda_python3"
345
+ },
346
+ "language_info": {
347
+ "codemirror_mode": {
348
+ "name": "ipython",
349
+ "version": 3
350
+ },
351
+ "file_extension": ".py",
352
+ "mimetype": "text/x-python",
353
+ "name": "python",
354
+ "nbconvert_exporter": "python",
355
+ "pygments_lexer": "ipython3",
356
+ "version": "3.10.14"
357
+ }
358
+ },
359
+ "nbformat": 4,
360
+ "nbformat_minor": 5
361
+ }
.Trash-1000/files/Untitled1.ipynb ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "90983f68",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "#deletion and creation of index\n",
11
+ "\n",
12
+ "import requests\n",
13
+ "\n",
14
+ "\n",
15
+ "api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
16
+ "base_url = \"https://api.pinecone.io\"\n",
17
+ "\n",
18
+ "# Index name\n",
19
+ "index_name = \"ktbot\"\n",
20
+ "\n",
21
+ "# Construct the delete URL for the index\n",
22
+ "delete_url = f\"{base_url}/indexes/{index_name}\"\n",
23
+ "\n",
24
+ "# Headers with API key\n",
25
+ "headers = {\n",
26
+ " \"Content-Type\": \"application/json\",\n",
27
+ " \"Api-Key\": api_key\n",
28
+ "}\n",
29
+ "\n",
30
+ "# Send the DELETE request to delete the existing index\n",
31
+ "response_delete = requests.delete(delete_url, headers=headers)\n",
32
+ "print(response_delete.status_code)\n",
33
+ "# Check the response status\n",
34
+ "if response_delete.status_code in (204, 202, 404):\n",
35
+ " print(f\"Index '{index_name}' deleted successfully.\")\n",
36
+ "else:\n",
37
+ " print(f\"Failed to delete index '{index_name}': {response_delete.status_code} - {response_delete.text}\")\n",
38
+ "\n",
39
+ "\n",
40
+ "\n",
41
+ "\n",
42
+ "# Construct the URL for creating the index\n",
43
+ "create_index_url = f\"{base_url}/indexes\"\n",
44
+ "\n",
45
+ "\n",
46
+ "# Define the payload for creating the index\n",
47
+ "payload = {\n",
48
+ " \"name\": index_name,\n",
49
+ " \"dimension\": 1536,\n",
50
+ " \"metric\": \"cosine\",\n",
51
+ " \"spec\": {\n",
52
+ " \"serverless\": {\n",
53
+ " \"cloud\": \"aws\",\n",
54
+ " \"region\": \"us-east-1\"\n",
55
+ " }\n",
56
+ " }\n",
57
+ "}\n",
58
+ "\n",
59
+ "# Send the POST request to create the index\n",
60
+ "response = requests.post(create_index_url, headers=headers, json=payload)\n",
61
+ "\n",
62
+ "# Check the response status\n",
63
+ "if response.status_code == 201:\n",
64
+ " print(f\"Index '{index_name}' created successfully.\")\n",
65
+ " response_json = json.loads(response.text)\n",
66
+ " host = response_json[\"host\"]\n",
67
+ " # Print the host\n",
68
+ " print(\"Host for index 'ktbot':\", host)\n",
69
+ "\n",
70
+ "else:\n",
71
+ " print(f\"Failed to create index '{index_name}': {response.status_code} - {response.text}\")\n"
72
+ ]
73
+ }
74
+ ],
75
+ "metadata": {
76
+ "kernelspec": {
77
+ "display_name": "conda_python3",
78
+ "language": "python",
79
+ "name": "conda_python3"
80
+ },
81
+ "language_info": {
82
+ "codemirror_mode": {
83
+ "name": "ipython",
84
+ "version": 3
85
+ },
86
+ "file_extension": ".py",
87
+ "mimetype": "text/x-python",
88
+ "name": "python",
89
+ "nbconvert_exporter": "python",
90
+ "pygments_lexer": "ipython3",
91
+ "version": "3.10.14"
92
+ }
93
+ },
94
+ "nbformat": 4,
95
+ "nbformat_minor": 5
96
+ }
.Trash-1000/info/Index_update.ipynb.trashinfo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [Trash Info]
2
+ Path=Index_update.ipynb
3
+ DeletionDate=2024-06-07T09:05:49
.Trash-1000/info/Pinecone_upsert.ipynb.trashinfo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [Trash Info]
2
+ Path=Pinecone_upsert.ipynb
3
+ DeletionDate=2024-06-18T08:33:21
.Trash-1000/info/Untitled 1.ipynb.trashinfo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [Trash Info]
2
+ Path=Untitled.ipynb
3
+ DeletionDate=2024-06-18T08:33:21
.Trash-1000/info/Untitled.ipynb.trashinfo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [Trash Info]
2
+ Path=Untitled.ipynb
3
+ DeletionDate=2024-06-07T07:59:19
.Trash-1000/info/Untitled1.ipynb.trashinfo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [Trash Info]
2
+ Path=Untitled1.ipynb
3
+ DeletionDate=2024-06-07T07:59:19
.ipynb_checkpoints/KTBOT-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
KTBOT.ipynb ADDED
@@ -0,0 +1,1084 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "id": "3cb079ba",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Collecting boto3==1.28.44\n",
14
+ " Downloading boto3-1.28.44-py3-none-any.whl.metadata (6.7 kB)\n",
15
+ "Collecting botocore<1.32.0,>=1.31.44 (from boto3==1.28.44)\n",
16
+ " Downloading botocore-1.31.85-py3-none-any.whl.metadata (6.1 kB)\n",
17
+ "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from boto3==1.28.44) (1.0.1)\n",
18
+ "Collecting s3transfer<0.7.0,>=0.6.0 (from boto3==1.28.44)\n",
19
+ " Downloading s3transfer-0.6.2-py3-none-any.whl.metadata (1.8 kB)\n",
20
+ "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from botocore<1.32.0,>=1.31.44->boto3==1.28.44) (2.9.0)\n",
21
+ "Collecting urllib3<2.1,>=1.25.4 (from botocore<1.32.0,>=1.31.44->boto3==1.28.44)\n",
22
+ " Downloading urllib3-2.0.7-py3-none-any.whl.metadata (6.6 kB)\n",
23
+ "Requirement already satisfied: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.32.0,>=1.31.44->boto3==1.28.44) (1.16.0)\n",
24
+ "Downloading boto3-1.28.44-py3-none-any.whl (135 kB)\n",
25
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.8/135.8 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
26
+ "\u001b[?25hDownloading botocore-1.31.85-py3-none-any.whl (11.3 MB)\n",
27
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.3/11.3 MB\u001b[0m \u001b[31m92.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
28
+ "\u001b[?25hDownloading s3transfer-0.6.2-py3-none-any.whl (79 kB)\n",
29
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.8/79.8 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
30
+ "\u001b[?25hDownloading urllib3-2.0.7-py3-none-any.whl (124 kB)\n",
31
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 kB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
32
+ "\u001b[?25hInstalling collected packages: urllib3, botocore, s3transfer, boto3\n",
33
+ " Attempting uninstall: urllib3\n",
34
+ " Found existing installation: urllib3 2.2.1\n",
35
+ " Uninstalling urllib3-2.2.1:\n",
36
+ " Successfully uninstalled urllib3-2.2.1\n",
37
+ " Attempting uninstall: botocore\n",
38
+ " Found existing installation: botocore 1.34.101\n",
39
+ " Uninstalling botocore-1.34.101:\n",
40
+ " Successfully uninstalled botocore-1.34.101\n",
41
+ " Attempting uninstall: s3transfer\n",
42
+ " Found existing installation: s3transfer 0.10.1\n",
43
+ " Uninstalling s3transfer-0.10.1:\n",
44
+ " Successfully uninstalled s3transfer-0.10.1\n",
45
+ " Attempting uninstall: boto3\n",
46
+ " Found existing installation: boto3 1.34.101\n",
47
+ " Uninstalling boto3-1.34.101:\n",
48
+ " Successfully uninstalled boto3-1.34.101\n",
49
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
50
+ "awscli 1.32.101 requires botocore==1.34.101, but you have botocore 1.31.85 which is incompatible.\n",
51
+ "awscli 1.32.101 requires s3transfer<0.11.0,>=0.10.0, but you have s3transfer 0.6.2 which is incompatible.\n",
52
+ "sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
53
+ "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.1 which is incompatible.\n",
54
+ "sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
55
+ "\u001b[0mSuccessfully installed boto3-1.28.44 botocore-1.31.85 s3transfer-0.6.2 urllib3-2.0.7\n",
56
+ "Collecting docx==0.2.4\n",
57
+ " Downloading docx-0.2.4.tar.gz (54 kB)\n",
58
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.9/54.9 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
59
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
60
+ "\u001b[?25hCollecting lxml (from docx==0.2.4)\n",
61
+ " Downloading lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n",
62
+ "Requirement already satisfied: Pillow>=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from docx==0.2.4) (10.2.0)\n",
63
+ "Downloading lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.0 MB)\n",
64
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m55.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
65
+ "\u001b[?25hBuilding wheels for collected packages: docx\n",
66
+ " Building wheel for docx (setup.py) ... \u001b[?25ldone\n",
67
+ "\u001b[?25h Created wheel for docx: filename=docx-0.2.4-py3-none-any.whl size=53895 sha256=95ee7562fbf76e3f8f45492dfc3ff7802626fe7573e681d40e6bb9e1ee6a85fd\n",
68
+ " Stored in directory: /home/ec2-user/.cache/pip/wheels/81/f5/1d/e09ba2c1907a43a4146d1189ae4733ca1a3bfe27ee39507767\n",
69
+ "Successfully built docx\n",
70
+ "Installing collected packages: lxml, docx\n",
71
+ "Successfully installed docx-0.2.4 lxml-5.2.2\n",
72
+ "Collecting httpx==0.26.0 (from httpx[http2]==0.26.0)\n",
73
+ " Downloading httpx-0.26.0-py3-none-any.whl.metadata (7.6 kB)\n",
74
+ "Requirement already satisfied: anyio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (4.3.0)\n",
75
+ "Requirement already satisfied: certifi in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (2024.2.2)\n",
76
+ "Requirement already satisfied: httpcore==1.* in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (1.0.4)\n",
77
+ "Requirement already satisfied: idna in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (3.6)\n",
78
+ "Requirement already satisfied: sniffio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx==0.26.0->httpx[http2]==0.26.0) (1.3.1)\n",
79
+ "Requirement already satisfied: h2<5,>=3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx[http2]==0.26.0) (4.1.0)\n",
80
+ "Requirement already satisfied: h11<0.15,>=0.13 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpcore==1.*->httpx==0.26.0->httpx[http2]==0.26.0) (0.14.0)\n",
81
+ "Requirement already satisfied: hyperframe<7,>=6.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from h2<5,>=3->httpx[http2]==0.26.0) (6.0.1)\n",
82
+ "Requirement already satisfied: hpack<5,>=4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from h2<5,>=3->httpx[http2]==0.26.0) (4.0.0)\n",
83
+ "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio->httpx==0.26.0->httpx[http2]==0.26.0) (1.2.0)\n",
84
+ "Requirement already satisfied: typing-extensions>=4.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio->httpx==0.26.0->httpx[http2]==0.26.0) (4.10.0)\n",
85
+ "Downloading httpx-0.26.0-py3-none-any.whl (75 kB)\n",
86
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
87
+ "\u001b[?25hInstalling collected packages: httpx\n",
88
+ " Attempting uninstall: httpx\n",
89
+ " Found existing installation: httpx 0.27.0\n",
90
+ " Uninstalling httpx-0.27.0:\n",
91
+ " Successfully uninstalled httpx-0.27.0\n",
92
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
93
+ "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.1 which is incompatible.\u001b[0m\u001b[31m\n",
94
+ "\u001b[0mSuccessfully installed httpx-0.26.0\n",
95
+ "Collecting langchain==0.2.0\n",
96
+ " Downloading langchain-0.2.0-py3-none-any.whl.metadata (13 kB)\n",
97
+ "Requirement already satisfied: PyYAML>=5.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (6.0.1)\n",
98
+ "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (2.0.29)\n",
99
+ "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (3.9.3)\n",
100
+ "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (4.0.3)\n",
101
+ "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain==0.2.0)\n",
102
+ " Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n",
103
+ "Collecting langchain-core<0.3.0,>=0.2.0 (from langchain==0.2.0)\n",
104
+ " Downloading langchain_core-0.2.8-py3-none-any.whl.metadata (5.8 kB)\n",
105
+ "Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain==0.2.0)\n",
106
+ " Downloading langchain_text_splitters-0.2.1-py3-none-any.whl.metadata (2.2 kB)\n"
107
+ ]
108
+ },
109
+ {
110
+ "name": "stdout",
111
+ "output_type": "stream",
112
+ "text": [
113
+ "Collecting langsmith<0.2.0,>=0.1.17 (from langchain==0.2.0)\n",
114
+ " Downloading langsmith-0.1.79-py3-none-any.whl.metadata (13 kB)\n",
115
+ "Requirement already satisfied: numpy<2,>=1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (1.22.4)\n",
116
+ "Collecting pydantic<3,>=1 (from langchain==0.2.0)\n",
117
+ " Downloading pydantic-2.7.4-py3-none-any.whl.metadata (109 kB)\n",
118
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m109.4/109.4 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
119
+ "\u001b[?25hRequirement already satisfied: requests<3,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (2.31.0)\n",
120
+ "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain==0.2.0) (8.2.3)\n",
121
+ "Requirement already satisfied: aiosignal>=1.1.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (1.3.1)\n",
122
+ "Requirement already satisfied: attrs>=17.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (23.2.0)\n",
123
+ "Requirement already satisfied: frozenlist>=1.1.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (1.4.1)\n",
124
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (6.0.5)\n",
125
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.2.0) (1.9.4)\n",
126
+ "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain==0.2.0)\n",
127
+ " Downloading marshmallow-3.21.3-py3-none-any.whl.metadata (7.1 kB)\n",
128
+ "Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain==0.2.0)\n",
129
+ " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n",
130
+ "Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.0->langchain==0.2.0)\n",
131
+ " Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)\n",
132
+ "Collecting packaging<25,>=23.2 (from langchain-core<0.3.0,>=0.2.0->langchain==0.2.0)\n",
133
+ " Downloading packaging-24.1-py3-none-any.whl.metadata (3.2 kB)\n",
134
+ "Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain==0.2.0)\n",
135
+ " Downloading orjson-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (49 kB)\n",
136
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.7/49.7 kB\u001b[0m \u001b[31m541.6 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
137
+ "\u001b[?25hCollecting annotated-types>=0.4.0 (from pydantic<3,>=1->langchain==0.2.0)\n",
138
+ " Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
139
+ "Collecting pydantic-core==2.18.4 (from pydantic<3,>=1->langchain==0.2.0)\n",
140
+ " Downloading pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n",
141
+ "Requirement already satisfied: typing-extensions>=4.6.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1->langchain==0.2.0) (4.10.0)\n",
142
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.2.0) (3.3.2)\n",
143
+ "Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.2.0) (3.6)\n",
144
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.2.0) (2.0.7)\n",
145
+ "Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain==0.2.0) (2024.2.2)\n",
146
+ "Requirement already satisfied: greenlet!=0.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain==0.2.0) (3.0.3)\n",
147
+ "Requirement already satisfied: jsonpointer>=1.9 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.0->langchain==0.2.0) (2.4)\n",
148
+ "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain==0.2.0) (1.0.0)\n",
149
+ "Downloading langchain-0.2.0-py3-none-any.whl (973 kB)\n",
150
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m973.7/973.7 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
151
+ "\u001b[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n",
152
+ "Downloading langchain_core-0.2.8-py3-none-any.whl (315 kB)\n",
153
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m315.8/315.8 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
154
+ "\u001b[?25hDownloading langchain_text_splitters-0.2.1-py3-none-any.whl (23 kB)\n",
155
+ "Downloading langsmith-0.1.79-py3-none-any.whl (125 kB)\n",
156
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.3/125.3 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
157
+ "\u001b[?25hDownloading pydantic-2.7.4-py3-none-any.whl (409 kB)\n",
158
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m409.0/409.0 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
159
+ "\u001b[?25hDownloading pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
160
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
161
+ "\u001b[?25hDownloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
162
+ "Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n",
163
+ "Downloading marshmallow-3.21.3-py3-none-any.whl (49 kB)\n",
164
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.2/49.2 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
165
+ "\u001b[?25hDownloading orjson-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n",
166
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m145.0/145.0 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
167
+ "\u001b[?25hDownloading packaging-24.1-py3-none-any.whl (53 kB)\n",
168
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
169
+ "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
170
+ "Installing collected packages: typing-inspect, pydantic-core, packaging, orjson, jsonpatch, annotated-types, pydantic, marshmallow, langsmith, dataclasses-json, langchain-core, langchain-text-splitters, langchain\n",
171
+ " Attempting uninstall: packaging\n",
172
+ " Found existing installation: packaging 21.3\n",
173
+ " Uninstalling packaging-21.3:\n",
174
+ " Successfully uninstalled packaging-21.3\n",
175
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
176
+ "sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
177
+ "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.1 which is incompatible.\n",
178
+ "sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
179
+ "\u001b[0mSuccessfully installed annotated-types-0.7.0 dataclasses-json-0.6.7 jsonpatch-1.33 langchain-0.2.0 langchain-core-0.2.8 langchain-text-splitters-0.2.1 langsmith-0.1.79 marshmallow-3.21.3 orjson-3.10.5 packaging-24.1 pydantic-2.7.4 pydantic-core-2.18.4 typing-inspect-0.9.0\n",
180
+ "Collecting openai==1.30.1\n",
181
+ " Downloading openai-1.30.1-py3-none-any.whl.metadata (21 kB)\n",
182
+ "Requirement already satisfied: anyio<5,>=3.5.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (4.3.0)\n",
183
+ "Collecting distro<2,>=1.7.0 (from openai==1.30.1)\n",
184
+ " Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n",
185
+ "Requirement already satisfied: httpx<1,>=0.23.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (0.26.0)\n",
186
+ "Requirement already satisfied: pydantic<3,>=1.9.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (2.7.4)\n",
187
+ "Requirement already satisfied: sniffio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (1.3.1)\n",
188
+ "Requirement already satisfied: tqdm>4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (4.66.2)\n",
189
+ "Requirement already satisfied: typing-extensions<5,>=4.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from openai==1.30.1) (4.10.0)\n"
190
+ ]
191
+ },
192
+ {
193
+ "name": "stdout",
194
+ "output_type": "stream",
195
+ "text": [
196
+ "Requirement already satisfied: idna>=2.8 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai==1.30.1) (3.6)\n",
197
+ "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai==1.30.1) (1.2.0)\n",
198
+ "Requirement already satisfied: certifi in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai==1.30.1) (2024.2.2)\n",
199
+ "Requirement already satisfied: httpcore==1.* in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai==1.30.1) (1.0.4)\n",
200
+ "Requirement already satisfied: h11<0.15,>=0.13 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai==1.30.1) (0.14.0)\n",
201
+ "Requirement already satisfied: annotated-types>=0.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->openai==1.30.1) (0.7.0)\n",
202
+ "Requirement already satisfied: pydantic-core==2.18.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->openai==1.30.1) (2.18.4)\n",
203
+ "Downloading openai-1.30.1-py3-none-any.whl (320 kB)\n",
204
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m320.6/320.6 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
205
+ "\u001b[?25hDownloading distro-1.9.0-py3-none-any.whl (20 kB)\n",
206
+ "Installing collected packages: distro, openai\n",
207
+ "Successfully installed distro-1.9.0 openai-1.30.1\n",
208
+ "Collecting pandas==2.2.2\n",
209
+ " Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)\n",
210
+ "Requirement already satisfied: numpy>=1.22.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas==2.2.2) (1.22.4)\n",
211
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas==2.2.2) (2.9.0)\n",
212
+ "Requirement already satisfied: pytz>=2020.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas==2.2.2) (2024.1)\n",
213
+ "Requirement already satisfied: tzdata>=2022.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas==2.2.2) (2024.1)\n",
214
+ "Requirement already satisfied: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas==2.2.2) (1.16.0)\n",
215
+ "Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)\n",
216
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m56.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
217
+ "\u001b[?25hInstalling collected packages: pandas\n",
218
+ " Attempting uninstall: pandas\n",
219
+ " Found existing installation: pandas 2.2.1\n",
220
+ " Uninstalling pandas-2.2.1:\n",
221
+ " Successfully uninstalled pandas-2.2.1\n",
222
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
223
+ "autovizwidget 0.21.0 requires pandas<2.0.0,>=0.20.1, but you have pandas 2.2.2 which is incompatible.\n",
224
+ "hdijupyterutils 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
225
+ "sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
226
+ "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\u001b[0m\u001b[31m\n",
227
+ "\u001b[0mSuccessfully installed pandas-2.2.2\n",
228
+ "Collecting pinecone-client==4.1.0\n",
229
+ " Downloading pinecone_client-4.1.0-py3-none-any.whl.metadata (16 kB)\n",
230
+ "Requirement already satisfied: certifi>=2019.11.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pinecone-client==4.1.0) (2024.2.2)\n",
231
+ "Requirement already satisfied: tqdm>=4.64.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pinecone-client==4.1.0) (4.66.2)\n",
232
+ "Requirement already satisfied: typing-extensions>=3.7.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pinecone-client==4.1.0) (4.10.0)\n",
233
+ "Requirement already satisfied: urllib3>=1.26.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pinecone-client==4.1.0) (2.0.7)\n",
234
+ "Downloading pinecone_client-4.1.0-py3-none-any.whl (215 kB)\n",
235
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m215.5/215.5 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
236
+ "\u001b[?25hInstalling collected packages: pinecone-client\n",
237
+ "Successfully installed pinecone-client-4.1.0\n",
238
+ "Collecting streamlit==1.32.2\n",
239
+ " Downloading streamlit-1.32.2-py2.py3-none-any.whl.metadata (8.5 kB)\n",
240
+ "Collecting altair<6,>=4.0 (from streamlit==1.32.2)\n",
241
+ " Downloading altair-5.3.0-py3-none-any.whl.metadata (9.2 kB)\n",
242
+ "Requirement already satisfied: blinker<2,>=1.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (1.7.0)\n",
243
+ "Collecting cachetools<6,>=4.0 (from streamlit==1.32.2)\n",
244
+ " Downloading cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)\n",
245
+ "Requirement already satisfied: click<9,>=7.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (8.1.7)\n",
246
+ "Requirement already satisfied: numpy<2,>=1.19.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (1.22.4)\n",
247
+ "Collecting packaging<24,>=16.8 (from streamlit==1.32.2)\n",
248
+ " Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)\n",
249
+ "Requirement already satisfied: pandas<3,>=1.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (2.2.2)\n",
250
+ "Requirement already satisfied: pillow<11,>=7.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (10.2.0)\n",
251
+ "Requirement already satisfied: protobuf<5,>=3.20 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (4.25.3)\n",
252
+ "Requirement already satisfied: pyarrow>=7.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (15.0.2)\n",
253
+ "Requirement already satisfied: requests<3,>=2.27 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (2.31.0)\n",
254
+ "Requirement already satisfied: rich<14,>=10.14.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (13.7.1)\n",
255
+ "Requirement already satisfied: tenacity<9,>=8.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (8.2.3)\n",
256
+ "Requirement already satisfied: toml<2,>=0.10.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (0.10.2)\n",
257
+ "Requirement already satisfied: typing-extensions<5,>=4.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (4.10.0)\n",
258
+ "Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit==1.32.2)\n",
259
+ " Using cached GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)\n",
260
+ "Collecting pydeck<1,>=0.8.0b4 (from streamlit==1.32.2)\n",
261
+ " Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)\n",
262
+ "Requirement already satisfied: tornado<7,>=6.0.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (6.4)\n",
263
+ "Requirement already satisfied: watchdog>=2.1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from streamlit==1.32.2) (4.0.0)\n",
264
+ "Requirement already satisfied: jinja2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6,>=4.0->streamlit==1.32.2) (3.1.3)\n",
265
+ "Requirement already satisfied: jsonschema>=3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6,>=4.0->streamlit==1.32.2) (4.21.1)\n",
266
+ "Requirement already satisfied: toolz in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6,>=4.0->streamlit==1.32.2) (0.12.1)\n",
267
+ "Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit==1.32.2)\n",
268
+ " Using cached gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)\n",
269
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3,>=1.3.0->streamlit==1.32.2) (2.9.0)\n",
270
+ "Requirement already satisfied: pytz>=2020.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3,>=1.3.0->streamlit==1.32.2) (2024.1)\n",
271
+ "Requirement already satisfied: tzdata>=2022.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3,>=1.3.0->streamlit==1.32.2) (2024.1)\n"
272
+ ]
273
+ },
274
+ {
275
+ "name": "stdout",
276
+ "output_type": "stream",
277
+ "text": [
278
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.27->streamlit==1.32.2) (3.3.2)\n",
279
+ "Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.27->streamlit==1.32.2) (3.6)\n",
280
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.27->streamlit==1.32.2) (2.0.7)\n",
281
+ "Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2.27->streamlit==1.32.2) (2024.2.2)\n",
282
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from rich<14,>=10.14.0->streamlit==1.32.2) (3.0.0)\n",
283
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from rich<14,>=10.14.0->streamlit==1.32.2) (2.17.2)\n",
284
+ "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit==1.32.2)\n",
285
+ " Using cached smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)\n",
286
+ "Requirement already satisfied: MarkupSafe>=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jinja2->altair<6,>=4.0->streamlit==1.32.2) (2.1.5)\n",
287
+ "Requirement already satisfied: attrs>=22.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit==1.32.2) (23.2.0)\n",
288
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit==1.32.2) (2023.12.1)\n",
289
+ "Requirement already satisfied: referencing>=0.28.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit==1.32.2) (0.34.0)\n",
290
+ "Requirement already satisfied: rpds-py>=0.7.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit==1.32.2) (0.18.0)\n",
291
+ "Requirement already satisfied: mdurl~=0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich<14,>=10.14.0->streamlit==1.32.2) (0.1.2)\n",
292
+ "Requirement already satisfied: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas<3,>=1.3.0->streamlit==1.32.2) (1.16.0)\n",
293
+ "Downloading streamlit-1.32.2-py2.py3-none-any.whl (8.1 MB)\n",
294
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.1/8.1 MB\u001b[0m \u001b[31m102.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
295
+ "\u001b[?25hDownloading altair-5.3.0-py3-none-any.whl (857 kB)\n",
296
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m857.8/857.8 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
297
+ "\u001b[?25hDownloading cachetools-5.3.3-py3-none-any.whl (9.3 kB)\n",
298
+ "Using cached GitPython-3.1.43-py3-none-any.whl (207 kB)\n",
299
+ "Downloading packaging-23.2-py3-none-any.whl (53 kB)\n",
300
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
301
+ "\u001b[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)\n",
302
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m96.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
303
+ "\u001b[?25hUsing cached gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
304
+ "Using cached smmap-5.0.1-py3-none-any.whl (24 kB)\n",
305
+ "Installing collected packages: smmap, packaging, cachetools, pydeck, gitdb, gitpython, altair, streamlit\n",
306
+ " Attempting uninstall: packaging\n",
307
+ " Found existing installation: packaging 24.1\n",
308
+ " Uninstalling packaging-24.1:\n",
309
+ " Successfully uninstalled packaging-24.1\n",
310
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
311
+ "sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
312
+ "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
313
+ "sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
314
+ "\u001b[0mSuccessfully installed altair-5.3.0 cachetools-5.3.3 gitdb-4.0.11 gitpython-3.1.43 packaging-23.2 pydeck-0.9.1 smmap-5.0.1 streamlit-1.32.2\n",
315
+ "Collecting requests==2.28.1\n",
316
+ " Downloading requests-2.28.1-py3-none-any.whl.metadata (4.6 kB)\n",
317
+ "Collecting charset-normalizer<3,>=2 (from requests==2.28.1)\n",
318
+ " Downloading charset_normalizer-2.1.1-py3-none-any.whl.metadata (11 kB)\n",
319
+ "Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests==2.28.1) (3.6)\n",
320
+ "Collecting urllib3<1.27,>=1.21.1 (from requests==2.28.1)\n",
321
+ " Downloading urllib3-1.26.19-py2.py3-none-any.whl.metadata (49 kB)\n",
322
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
323
+ "\u001b[?25hRequirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests==2.28.1) (2024.2.2)\n",
324
+ "Downloading requests-2.28.1-py3-none-any.whl (62 kB)\n",
325
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
326
+ "\u001b[?25hDownloading charset_normalizer-2.1.1-py3-none-any.whl (39 kB)\n",
327
+ "Downloading urllib3-1.26.19-py2.py3-none-any.whl (143 kB)\n",
328
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.9/143.9 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
329
+ "\u001b[?25hInstalling collected packages: urllib3, charset-normalizer, requests\n",
330
+ " Attempting uninstall: urllib3\n",
331
+ " Found existing installation: urllib3 2.0.7\n",
332
+ " Uninstalling urllib3-2.0.7:\n",
333
+ " Successfully uninstalled urllib3-2.0.7\n",
334
+ " Attempting uninstall: charset-normalizer\n",
335
+ " Found existing installation: charset-normalizer 3.3.2\n",
336
+ " Uninstalling charset-normalizer-3.3.2:\n",
337
+ " Successfully uninstalled charset-normalizer-3.3.2\n",
338
+ " Attempting uninstall: requests\n",
339
+ " Found existing installation: requests 2.31.0\n",
340
+ " Uninstalling requests-2.31.0:\n",
341
+ " Successfully uninstalled requests-2.31.0\n",
342
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
343
+ "jupyterlab-server 2.25.4 requires requests>=2.31, but you have requests 2.28.1 which is incompatible.\n",
344
+ "sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
345
+ "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
346
+ "sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
347
+ "\u001b[0mSuccessfully installed charset-normalizer-2.1.1 requests-2.28.1 urllib3-1.26.19\n",
348
+ "Collecting python-docx==0.8.11\n",
349
+ " Downloading python-docx-0.8.11.tar.gz (5.6 MB)\n",
350
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m25.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
351
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
352
+ "\u001b[?25hRequirement already satisfied: lxml>=2.3.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-docx==0.8.11) (5.2.2)\n",
353
+ "Building wheels for collected packages: python-docx\n",
354
+ " Building wheel for python-docx (setup.py) ... \u001b[?25ldone\n",
355
+ "\u001b[?25h Created wheel for python-docx: filename=python_docx-0.8.11-py3-none-any.whl size=184488 sha256=1beea9ffacd183f8637c48e55e4ecb5a01557173f2b87cf7bee13b186b97942e\n",
356
+ " Stored in directory: /home/ec2-user/.cache/pip/wheels/80/27/06/837436d4c3bd989b957a91679966f207bfd71d358d63a8194d\n",
357
+ "Successfully built python-docx\n",
358
+ "Installing collected packages: python-docx\n",
359
+ "Successfully installed python-docx-0.8.11\n",
360
+ "Collecting langchain-community\n",
361
+ " Downloading langchain_community-0.2.5-py3-none-any.whl.metadata (2.5 kB)\n",
362
+ "Requirement already satisfied: PyYAML>=5.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (6.0.1)\n",
363
+ "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (2.0.29)\n",
364
+ "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (3.9.3)\n",
365
+ "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (0.6.7)\n"
366
+ ]
367
+ },
368
+ {
369
+ "name": "stdout",
370
+ "output_type": "stream",
371
+ "text": [
372
+ "Collecting langchain<0.3.0,>=0.2.5 (from langchain-community)\n",
373
+ " Downloading langchain-0.2.5-py3-none-any.whl.metadata (7.0 kB)\n",
374
+ "Requirement already satisfied: langchain-core<0.3.0,>=0.2.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (0.2.8)\n",
375
+ "Requirement already satisfied: langsmith<0.2.0,>=0.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (0.1.79)\n",
376
+ "Requirement already satisfied: numpy<2,>=1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (1.22.4)\n",
377
+ "Requirement already satisfied: requests<3,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (2.28.1)\n",
378
+ "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-community) (8.2.3)\n",
379
+ "Requirement already satisfied: aiosignal>=1.1.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.1)\n",
380
+ "Requirement already satisfied: attrs>=17.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (23.2.0)\n",
381
+ "Requirement already satisfied: frozenlist>=1.1.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.4.1)\n",
382
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.0.5)\n",
383
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.9.4)\n",
384
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (4.0.3)\n",
385
+ "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (3.21.3)\n",
386
+ "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n",
387
+ "Requirement already satisfied: langchain-text-splitters<0.3.0,>=0.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain<0.3.0,>=0.2.5->langchain-community) (0.2.1)\n",
388
+ "Requirement already satisfied: pydantic<3,>=1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain<0.3.0,>=0.2.5->langchain-community) (2.7.4)\n",
389
+ "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-core<0.3.0,>=0.2.7->langchain-community) (1.33)\n",
390
+ "Requirement already satisfied: packaging<25,>=23.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langchain-core<0.3.0,>=0.2.7->langchain-community) (23.2)\n",
391
+ "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.0->langchain-community) (3.10.5)\n",
392
+ "Requirement already satisfied: charset-normalizer<3,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (2.1.1)\n",
393
+ "Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (3.6)\n",
394
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (1.26.19)\n",
395
+ "Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (2024.2.2)\n",
396
+ "Requirement already satisfied: typing-extensions>=4.6.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain-community) (4.10.0)\n",
397
+ "Requirement already satisfied: greenlet!=0.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain-community) (3.0.3)\n",
398
+ "Requirement already satisfied: jsonpointer>=1.9 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.7->langchain-community) (2.4)\n",
399
+ "Requirement already satisfied: annotated-types>=0.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.5->langchain-community) (0.7.0)\n",
400
+ "Requirement already satisfied: pydantic-core==2.18.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.5->langchain-community) (2.18.4)\n",
401
+ "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community) (1.0.0)\n",
402
+ "Downloading langchain_community-0.2.5-py3-none-any.whl (2.2 MB)\n",
403
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m62.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
404
+ "\u001b[?25hDownloading langchain-0.2.5-py3-none-any.whl (974 kB)\n",
405
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m974.6/974.6 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
406
+ "\u001b[?25hInstalling collected packages: langchain, langchain-community\n",
407
+ " Attempting uninstall: langchain\n",
408
+ " Found existing installation: langchain 0.2.0\n",
409
+ " Uninstalling langchain-0.2.0:\n",
410
+ " Successfully uninstalled langchain-0.2.0\n",
411
+ "Successfully installed langchain-0.2.5 langchain-community-0.2.5\n",
412
+ "Collecting gradio\n",
413
+ " Downloading gradio-4.36.1-py3-none-any.whl.metadata (15 kB)\n",
414
+ "Collecting aiofiles<24.0,>=22.0 (from gradio)\n",
415
+ " Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n",
416
+ "Requirement already satisfied: altair<6.0,>=4.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (5.3.0)\n",
417
+ "Collecting fastapi (from gradio)\n",
418
+ " Downloading fastapi-0.111.0-py3-none-any.whl.metadata (25 kB)\n",
419
+ "Collecting ffmpy (from gradio)\n",
420
+ " Downloading ffmpy-0.3.2.tar.gz (5.5 kB)\n",
421
+ " Preparing metadata (setup.py) ... \u001b[?25ldone\n",
422
+ "\u001b[?25hCollecting gradio-client==1.0.1 (from gradio)\n",
423
+ " Downloading gradio_client-1.0.1-py3-none-any.whl.metadata (7.1 kB)\n",
424
+ "Requirement already satisfied: httpx>=0.24.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (0.26.0)\n",
425
+ "Collecting huggingface-hub>=0.19.3 (from gradio)\n",
426
+ " Downloading huggingface_hub-0.23.4-py3-none-any.whl.metadata (12 kB)\n",
427
+ "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (6.4.0)\n",
428
+ "Requirement already satisfied: jinja2<4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (3.1.3)\n",
429
+ "Requirement already satisfied: markupsafe~=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (2.1.5)\n",
430
+ "Requirement already satisfied: matplotlib~=3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (3.8.3)\n",
431
+ "Requirement already satisfied: numpy<3.0,>=1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (1.22.4)\n",
432
+ "Requirement already satisfied: orjson~=3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (3.10.5)\n",
433
+ "Requirement already satisfied: packaging in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (23.2)\n",
434
+ "Requirement already satisfied: pandas<3.0,>=1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (2.2.2)\n",
435
+ "Requirement already satisfied: pillow<11.0,>=8.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (10.2.0)\n",
436
+ "Requirement already satisfied: pydantic>=2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (2.7.4)\n",
437
+ "Collecting pydub (from gradio)\n",
438
+ " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
439
+ "Collecting python-multipart>=0.0.9 (from gradio)\n",
440
+ " Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)\n"
441
+ ]
442
+ },
443
+ {
444
+ "name": "stdout",
445
+ "output_type": "stream",
446
+ "text": [
447
+ "Requirement already satisfied: pyyaml<7.0,>=5.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (6.0.1)\n",
448
+ "Collecting ruff>=0.2.2 (from gradio)\n",
449
+ " Downloading ruff-0.4.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (24 kB)\n",
450
+ "Collecting semantic-version~=2.0 (from gradio)\n",
451
+ " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n",
452
+ "Collecting tomlkit==0.12.0 (from gradio)\n",
453
+ " Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\n",
454
+ "Collecting typer<1.0,>=0.12 (from gradio)\n",
455
+ " Downloading typer-0.12.3-py3-none-any.whl.metadata (15 kB)\n",
456
+ "Requirement already satisfied: typing-extensions~=4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio) (4.10.0)\n",
457
+ "Collecting urllib3~=2.0 (from gradio)\n",
458
+ " Downloading urllib3-2.2.2-py3-none-any.whl.metadata (6.4 kB)\n",
459
+ "Collecting uvicorn>=0.14.0 (from gradio)\n",
460
+ " Downloading uvicorn-0.30.1-py3-none-any.whl.metadata (6.3 kB)\n",
461
+ "Requirement already satisfied: fsspec in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from gradio-client==1.0.1->gradio) (2024.3.1)\n",
462
+ "Collecting websockets<12.0,>=10.0 (from gradio-client==1.0.1->gradio)\n",
463
+ " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
464
+ "Requirement already satisfied: jsonschema>=3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6.0,>=4.2.0->gradio) (4.21.1)\n",
465
+ "Requirement already satisfied: toolz in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from altair<6.0,>=4.2.0->gradio) (0.12.1)\n",
466
+ "Requirement already satisfied: anyio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (4.3.0)\n",
467
+ "Requirement already satisfied: certifi in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (2024.2.2)\n",
468
+ "Requirement already satisfied: httpcore==1.* in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (1.0.4)\n",
469
+ "Requirement already satisfied: idna in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (3.6)\n",
470
+ "Requirement already satisfied: sniffio in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpx>=0.24.1->gradio) (1.3.1)\n",
471
+ "Requirement already satisfied: h11<0.15,>=0.13 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.14.0)\n",
472
+ "Requirement already satisfied: filelock in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from huggingface-hub>=0.19.3->gradio) (3.13.3)\n",
473
+ "Requirement already satisfied: requests in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from huggingface-hub>=0.19.3->gradio) (2.28.1)\n",
474
+ "Requirement already satisfied: tqdm>=4.42.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from huggingface-hub>=0.19.3->gradio) (4.66.2)\n",
475
+ "Requirement already satisfied: contourpy>=1.0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (1.2.0)\n",
476
+ "Requirement already satisfied: cycler>=0.10 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (0.12.1)\n",
477
+ "Requirement already satisfied: fonttools>=4.22.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (4.50.0)\n",
478
+ "Requirement already satisfied: kiwisolver>=1.3.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (1.4.5)\n",
479
+ "Requirement already satisfied: pyparsing>=2.3.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (3.1.2)\n",
480
+ "Requirement already satisfied: python-dateutil>=2.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (2.9.0)\n",
481
+ "Requirement already satisfied: pytz>=2020.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n",
482
+ "Requirement already satisfied: tzdata>=2022.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n",
483
+ "Requirement already satisfied: annotated-types>=0.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic>=2.0->gradio) (0.7.0)\n",
484
+ "Requirement already satisfied: pydantic-core==2.18.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pydantic>=2.0->gradio) (2.18.4)\n",
485
+ "Requirement already satisfied: click>=8.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (8.1.7)\n",
486
+ "Collecting shellingham>=1.3.0 (from typer<1.0,>=0.12->gradio)\n",
487
+ " Downloading shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)\n",
488
+ "Requirement already satisfied: rich>=10.11.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio) (13.7.1)\n",
489
+ "Collecting starlette<0.38.0,>=0.37.2 (from fastapi->gradio)\n",
490
+ " Downloading starlette-0.37.2-py3-none-any.whl.metadata (5.9 kB)\n",
491
+ "Collecting fastapi-cli>=0.0.2 (from fastapi->gradio)\n",
492
+ " Downloading fastapi_cli-0.0.4-py3-none-any.whl.metadata (7.0 kB)\n",
493
+ "Requirement already satisfied: ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from fastapi->gradio) (5.9.0)\n",
494
+ "Collecting email_validator>=2.0.0 (from fastapi->gradio)\n",
495
+ " Downloading email_validator-2.1.2-py3-none-any.whl.metadata (26 kB)\n",
496
+ "Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi->gradio)\n",
497
+ " Downloading dnspython-2.6.1-py3-none-any.whl.metadata (5.8 kB)\n",
498
+ "Requirement already satisfied: attrs>=22.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (23.2.0)\n",
499
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (2023.12.1)\n",
500
+ "Requirement already satisfied: referencing>=0.28.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.34.0)\n",
501
+ "Requirement already satisfied: rpds-py>=0.7.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.18.0)\n",
502
+ "Requirement already satisfied: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n",
503
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n",
504
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.17.2)\n",
505
+ "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from anyio->httpx>=0.24.1->gradio) (1.2.0)\n",
506
+ "Collecting httptools>=0.5.0 (from uvicorn[standard]>=0.12.0->fastapi->gradio)\n",
507
+ " Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n",
508
+ "Collecting python-dotenv>=0.13 (from uvicorn[standard]>=0.12.0->fastapi->gradio)\n",
509
+ " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n",
510
+ "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.12.0->fastapi->gradio)\n",
511
+ " Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
512
+ "Collecting watchfiles>=0.13 (from uvicorn[standard]>=0.12.0->fastapi->gradio)\n",
513
+ " Downloading watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
514
+ "Requirement already satisfied: charset-normalizer<3,>=2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from requests->huggingface-hub>=0.19.3->gradio) (2.1.1)\n",
515
+ "INFO: pip is looking at multiple versions of requests to determine which version is compatible with other requirements. This could take a while.\n"
516
+ ]
517
+ },
518
+ {
519
+ "name": "stdout",
520
+ "output_type": "stream",
521
+ "text": [
522
+ "Collecting requests (from huggingface-hub>=0.19.3->gradio)\n",
523
+ " Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n",
524
+ "Requirement already satisfied: mdurl~=0.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n",
525
+ "Downloading gradio-4.36.1-py3-none-any.whl (12.3 MB)\n",
526
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━��━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m29.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
527
+ "\u001b[?25hDownloading gradio_client-1.0.1-py3-none-any.whl (318 kB)\n",
528
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.1/318.1 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
529
+ "\u001b[?25hDownloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n",
530
+ "Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n",
531
+ "Downloading huggingface_hub-0.23.4-py3-none-any.whl (402 kB)\n",
532
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.6/402.6 kB\u001b[0m \u001b[31m56.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
533
+ "\u001b[?25hDownloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n",
534
+ "Downloading ruff-0.4.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.8 MB)\n",
535
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m89.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
536
+ "\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
537
+ "Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
538
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m767.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
539
+ "\u001b[?25hDownloading urllib3-2.2.2-py3-none-any.whl (121 kB)\n",
540
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.4/121.4 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
541
+ "\u001b[?25hDownloading uvicorn-0.30.1-py3-none-any.whl (62 kB)\n",
542
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
543
+ "\u001b[?25hDownloading fastapi-0.111.0-py3-none-any.whl (91 kB)\n",
544
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
545
+ "\u001b[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
546
+ "Downloading email_validator-2.1.2-py3-none-any.whl (30 kB)\n",
547
+ "Downloading fastapi_cli-0.0.4-py3-none-any.whl (9.5 kB)\n",
548
+ "Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
549
+ "Downloading starlette-0.37.2-py3-none-any.whl (71 kB)\n",
550
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
551
+ "\u001b[?25hDownloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
552
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m22.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
553
+ "\u001b[?25hDownloading requests-2.32.3-py3-none-any.whl (64 kB)\n",
554
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
555
+ "\u001b[?25hDownloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n",
556
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
557
+ "\u001b[?25hDownloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n",
558
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m44.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
559
+ "\u001b[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
560
+ "Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
561
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m27.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
562
+ "\u001b[?25hDownloading watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
563
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
564
+ "\u001b[?25hBuilding wheels for collected packages: ffmpy\n",
565
+ " Building wheel for ffmpy (setup.py) ... \u001b[?25ldone\n",
566
+ "\u001b[?25h Created wheel for ffmpy: filename=ffmpy-0.3.2-py3-none-any.whl size=5584 sha256=0397b4aac7e8bc2cd10586f4161b48a2c33365d31a941eaaf05df08b9e661664\n",
567
+ " Stored in directory: /home/ec2-user/.cache/pip/wheels/bd/65/9a/671fc6dcde07d4418df0c592f8df512b26d7a0029c2a23dd81\n",
568
+ "Successfully built ffmpy\n",
569
+ "Installing collected packages: pydub, ffmpy, websockets, uvloop, uvicorn, urllib3, tomlkit, shellingham, semantic-version, ruff, python-multipart, python-dotenv, httptools, dnspython, aiofiles, watchfiles, starlette, requests, email_validator, typer, huggingface-hub, gradio-client, fastapi-cli, fastapi, gradio\n",
570
+ " Attempting uninstall: urllib3\n",
571
+ " Found existing installation: urllib3 1.26.19\n",
572
+ " Uninstalling urllib3-1.26.19:\n",
573
+ " Successfully uninstalled urllib3-1.26.19\n",
574
+ " Attempting uninstall: tomlkit\n",
575
+ " Found existing installation: tomlkit 0.12.4\n",
576
+ " Uninstalling tomlkit-0.12.4:\n",
577
+ " Successfully uninstalled tomlkit-0.12.4\n",
578
+ " Attempting uninstall: requests\n",
579
+ " Found existing installation: requests 2.28.1\n",
580
+ " Uninstalling requests-2.28.1:\n",
581
+ " Successfully uninstalled requests-2.28.1\n",
582
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
583
+ "botocore 1.31.85 requires urllib3<2.1,>=1.25.4; python_version >= \"3.10\", but you have urllib3 2.2.2 which is incompatible.\n",
584
+ "sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
585
+ "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
586
+ "sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
587
+ "\u001b[0mSuccessfully installed aiofiles-23.2.1 dnspython-2.6.1 email_validator-2.1.2 fastapi-0.111.0 fastapi-cli-0.0.4 ffmpy-0.3.2 gradio-4.36.1 gradio-client-1.0.1 httptools-0.6.1 huggingface-hub-0.23.4 pydub-0.25.1 python-dotenv-1.0.1 python-multipart-0.0.9 requests-2.32.3 ruff-0.4.9 semantic-version-2.10.0 shellingham-1.5.4 starlette-0.37.2 tomlkit-0.12.0 typer-0.12.3 urllib3-2.2.2 uvicorn-0.30.1 uvloop-0.19.0 watchfiles-0.22.0 websockets-11.0.3\n"
588
+ ]
589
+ }
590
+ ],
591
+ "source": [
592
+ "!pip install boto3==1.28.44\n",
593
+ "!pip install docx==0.2.4\n",
594
+ "!pip install httpx[http2]==0.26.0\n",
595
+ "!pip install langchain==0.2.0\n",
596
+ "!pip install openai==1.30.1\n",
597
+ "!pip install pandas==2.2.2\n",
598
+ "!pip install pinecone-client==4.1.0\n",
599
+ "!pip install streamlit==1.32.2\n",
600
+ "!pip install requests==2.28.1\n",
601
+ "!pip install python-docx==0.8.11\n",
602
+ "!pip install langchain-community\n",
603
+ "!pip install gradio"
604
+ ]
605
+ },
606
+ {
607
+ "cell_type": "code",
608
+ "execution_count": 73,
609
+ "id": "35eed6ac",
610
+ "metadata": {},
611
+ "outputs": [
612
+ {
613
+ "name": "stdout",
614
+ "output_type": "stream",
615
+ "text": [
616
+ "Total number of document IDs: 9\n"
617
+ ]
618
+ },
619
+ {
620
+ "name": "stderr",
621
+ "output_type": "stream",
622
+ "text": [
623
+ "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
624
+ " \"https://urllib3.readthedocs.io/en/latest/advanced-usage.html\"\n",
625
+ "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'api.nlp.dev.uptimize.merckgroup.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
626
+ " \"https://urllib3.readthedocs.io/en/latest/advanced-usage.html\"\n"
627
+ ]
628
+ },
629
+ {
630
+ "name": "stdout",
631
+ "output_type": "stream",
632
+ "text": [
633
+ "{'results': [], 'matches': [{'id': 'gtn/input/knowledge_base/AUTOMATED FORECASTING PROCESS 4.docx-1', 'score': 0.879279137, 'values': []}, {'id': 'gtn/input/knowledge_base/automated adjustments process 3.docx-7', 'score': 0.827985406, 'values': []}], 'namespace': '', 'usage': {'readUnits': 5}}\n",
634
+ "Question: What should be the S3 bucket for final Forecast dump exported from SAC?\n",
635
+ "Documents: ['Step:8:- \\nExport the finalized forecast dump from SAP Analytics Cloud(SAC).', 'AUTOMATED ADJUSTMENTS PROCESS\\nStep:1:- \\nLoad the final Forecast dump exported from SAC into S3 bucket with path S3://emd-forecast/gtn/output/SAC_forecast_dump\\nStep:2:- \\nLoad business files into S3 bucket with path S3://emd-forecast/gtn/output/business_files\\nStep:3:- \\nLoad Mapping template for all the families into S3 bucket with path S3://emd-forecast/gtn/output\\nStep:4:- \\nRun Datacleaning.ipynb to convert the business files into readable and understandable format.\\nStep:5:- \\nRun Units_gross_']\n",
636
+ "The S3 bucket for the final Forecast dump exported from SAC should be S3://emd-forecast/gtn/output/SAC_forecast_dump.\n"
637
+ ]
638
+ }
639
+ ],
640
+ "source": [
641
+ "import os\n",
642
+ "from docx import Document\n",
643
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
644
+ "from openai import AzureOpenAI\n",
645
+ "import httpx\n",
646
+ "import requests\n",
647
+ "import json\n",
648
+ "import openai\n",
649
+ "import pinecone\n",
650
+ "from langchain.document_loaders import DirectoryLoader\n",
651
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
652
+ "from langchain.embeddings.openai import OpenAIEmbeddings\n",
653
+ "from langchain.vectorstores import Pinecone\n",
654
+ "from langchain.llms import OpenAI\n",
655
+ "from langchain.chains.question_answering import load_qa_chain\n",
656
+ "import boto3\n",
657
+ "# Initialize a session using Amazon S3\n",
658
+ "s3 = boto3.client('s3')\n",
659
+ "import io\n",
660
+ "import pinecone\n",
661
+ "from pinecone import Pinecone, ServerlessSpec\n",
662
+ "import requests\n",
663
+ "from pinecone import Pinecone\n",
664
+ "httpx_client = httpx.Client(http2=True, verify='cacert.pem')\n",
665
+ "\n",
666
+ "# Initialize AzureOpenAI client with the HTTP client\n",
667
+ "client = AzureOpenAI(\n",
668
+ " azure_endpoint=\"https://api.nlp.dev.uptimize.merckgroup.com\",\n",
669
+ " api_key='c6140592-6c65-4261-a959-2e2ba099526d',\n",
670
+ " api_version=\"2023-09-01-preview\",\n",
671
+ " http_client=httpx_client\n",
672
+ ")\n",
673
+ "\n",
674
+ "# Initialize Pinecone client\n",
675
+ "pc = Pinecone(\n",
676
+ " api_key=\"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
677
+ ")\n",
678
+ "\n",
679
+ "# Connect to the index\n",
680
+ "index_name = \"ktbot\"\n",
681
+ "index = pc.Index(index_name)\n",
682
+ "\n",
683
+ "# Collect all document IDs with pagination\n",
684
+ "doc_ids = []\n",
685
+ "limit = 90 # Maximum number of vector IDs per request\n",
686
+ "next_token = None\n",
687
+ "\n",
688
+ "while True:\n",
689
+ " response = index.list_paginated(namespace=None, limit=limit, pagination_token=next_token)\n",
690
+ " \n",
691
+ " # Collect IDs from the response\n",
692
+ " doc_ids.extend([v.id for v in response.vectors])\n",
693
+ " \n",
694
+ " # Update the next_token for pagination\n",
695
+ " next_token = response.next_page_token\n",
696
+ " \n",
697
+ " # Break the loop if there is no next token\n",
698
+ " if not next_token:\n",
699
+ " break\n",
700
+ "\n",
701
+ "\n",
702
+ "print(f\"Total number of document IDs: {len(doc_ids)}\")\n",
703
+ "\n",
704
+ "# Construct the API endpoint for fetching embeddings\n",
705
+ "base_url = \"https://ktbot-fshno4r.svc.aped-4627-b74a.pinecone.io\"\n",
706
+ "endpoint = \"/vectors/fetch\"\n",
707
+ "\n",
708
+ "# Prepare headers\n",
709
+ "api_key = \"8f0e6749-4859-4eb8-a8cf-4e2e72cf4bc1\"\n",
710
+ "headers = {\n",
711
+ " \"Content-Type\": \"application/json\",\n",
712
+ " \"Api-Key\": api_key\n",
713
+ "}\n",
714
+ "\n",
715
+ "# Dictionary to store the full text for each document chunk\n",
716
+ "doc_text_dict = {}\n",
717
+ "\n",
718
+ "# Function to fetch embedding for a given document ID\n",
719
+ "def fetch_metadata(doc_id):\n",
720
+ " url = f\"{base_url}{endpoint}?ids={doc_id}\"\n",
721
+ " try:\n",
722
+ " # Send the GET request to fetch metadata\n",
723
+ " response = requests.get(url, headers=headers)\n",
724
+ " response.raise_for_status() # Raise an exception for HTTP errors\n",
725
+ "\n",
726
+ " # Check HTTP response status\n",
727
+ " if response.status_code == 200:\n",
728
+ " # Attempt to parse JSON response\n",
729
+ " try:\n",
730
+ " data = response.json()\n",
731
+ " if \"vectors\" in data and doc_id in data[\"vectors\"]:\n",
732
+ " vector_data = data[\"vectors\"][doc_id]\n",
733
+ " return vector_data.get('metadata', {}).get('text', '') # Return the text from metadata\n",
734
+ " else:\n",
735
+ " print(f\"No metadata found for document ID: {doc_id}\")\n",
736
+ " return None\n",
737
+ " except ValueError:\n",
738
+ " print(\"Response is not valid JSON.\")\n",
739
+ " print(\"Response content:\", response.text)\n",
740
+ " return None\n",
741
+ " else:\n",
742
+ " print(f\"Failed to fetch metadata: {response.status_code} - {response.text}\")\n",
743
+ " return None\n",
744
+ "\n",
745
+ " except requests.exceptions.RequestException as e:\n",
746
+ " print(f\"Error fetching document: {e}\")\n",
747
+ " return None\n",
748
+ "\n",
749
+ "# Fetch metadata for all document IDs and populate the doc_text_dict\n",
750
+ "for doc_id in doc_ids:\n",
751
+ " text = fetch_metadata(doc_id)\n",
752
+ " if text:\n",
753
+ " doc_text_dict[doc_id] = text\n",
754
+ "\n",
755
+ "\n",
756
+ "\n",
757
+ "# Preprocess the id to extract a consistent format\n",
758
+ "# Preprocess the id to extract a consistent format\n",
759
+ "def preprocess_id(id_str):\n",
760
+ " if id_str.startswith(\"page_content=\"):\n",
761
+ " return id_str.split(\"=\")[1].strip(\"'\")\n",
762
+ " else:\n",
763
+ " return id_str\n",
764
+ "\n",
765
+ "\n",
766
+ "\n",
767
+ "# Update the get_similar_docs function to preprocess the id before retrieval\n",
768
+ "def get_similar_docs(query, k=2, score=False):\n",
769
+ " # Generate the embedding for the query\n",
770
+ " query_embedding_response = client.embeddings.create(input=query, model=\"text-embedding-ada-002-v2\")\n",
771
+ " query_embedding = query_embedding_response.data[0].embedding\n",
772
+ " \n",
773
+ " # Search the Pinecone index for similar documents\n",
774
+ " query_payload = {\n",
775
+ " \"top_k\": k,\n",
776
+ " \"include_values\": score,\n",
777
+ " \"vector\": query_embedding\n",
778
+ " }\n",
779
+ " query_endpoint = f\"{base_url}/query\"\n",
780
+ " query_response = requests.post(query_endpoint, headers=headers, json=query_payload, verify=False)\n",
781
+ " \n",
782
+ " # Extract and return the similar documents\n",
783
+ " if query_response.status_code == 200:\n",
784
+ " search_results = query_response.json()\n",
785
+ " print(search_results)\n",
786
+ " similar_docs = [doc_text_dict[preprocess_id(match['id'])] for match in search_results['matches']]\n",
787
+ " return similar_docs\n",
788
+ " else:\n",
789
+ " print(f\"Failed to retrieve similar documents: {query_response.status_code} - {query_response.reason}\")\n",
790
+ " print(\"Response content:\", query_response.text)\n",
791
+ " return []\n",
792
+ "\n",
793
+ "\n",
794
+ "def get_answer(query):\n",
795
+ " similar_docs = get_similar_docs(query)\n",
796
+ " combined_message = f\"Question: {query}\\nDocuments: {similar_docs}\"\n",
797
+ " print(combined_message)\n",
798
+ " openai_key = 'c6140592-6c65-4261-a959-2e2ba099526d'\n",
799
+ " openai_url = \"https://api.nlp.dev.uptimize.merckgroup.com/openai/deployments/gpt-4-turbo/chat/completions?api-version=2023-09-01-preview\"\n",
800
+ " headers = {\n",
801
+ " \"Content-Type\": \"application/json\",\n",
802
+ " \"x-api-key\": openai_key,\n",
803
+ " \"region\": \"EU\"\n",
804
+ " }\n",
805
+ " \n",
806
+ " payload = {\n",
807
+ " \"model\": \"gpt-4-turbo\",\n",
808
+ " \"messages\": [\n",
809
+ " {\"role\": \"user\", \"content\": combined_message}\n",
810
+ " ],\n",
811
+ " \"max_tokens\": 1024,\n",
812
+ " \"n\": 1,\n",
813
+ " \"temperature\": 0\n",
814
+ " }\n",
815
+ "\n",
816
+ " try:\n",
817
+ " response = requests.post(openai_url, headers=headers, data=json.dumps(payload), verify=False)\n",
818
+ " response.raise_for_status() # Raise an exception for HTTP errors\n",
819
+ " ChatGPT_reply = response.json()[\"choices\"][0][\"message\"][\"content\"]\n",
820
+ " return ChatGPT_reply\n",
821
+ " except requests.exceptions.RequestException as e:\n",
822
+ " print(\"ERROR\")\n",
823
+ " print(e)\n",
824
+ " raise Exception(f'Request failed: {e}')\n",
825
+ "\n",
826
+ "# Example usage:\n",
827
+ "query = \"What should be the S3 bucket for final Forecast dump exported from SAC?\"\n",
828
+ "answer = get_answer(query)\n",
829
+ "print(answer)\n"
830
+ ]
831
+ },
832
+ {
833
+ "cell_type": "code",
834
+ "execution_count": 75,
835
+ "id": "27ed9037",
836
+ "metadata": {},
837
+ "outputs": [
838
+ {
839
+ "name": "stdout",
840
+ "output_type": "stream",
841
+ "text": [
842
+ "Running on local URL: http://127.0.0.1:7860\n",
843
+ "Running on public URL: https://0e7b0972f54b527958.gradio.live\n",
844
+ "\n",
845
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
846
+ ]
847
+ },
848
+ {
849
+ "data": {
850
+ "text/html": [
851
+ "<div><iframe src=\"https://0e7b0972f54b527958.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
852
+ ],
853
+ "text/plain": [
854
+ "<IPython.core.display.HTML object>"
855
+ ]
856
+ },
857
+ "metadata": {},
858
+ "output_type": "display_data"
859
+ },
860
+ {
861
+ "name": "stdout",
862
+ "output_type": "stream",
863
+ "text": [
864
+ "Keyboard interruption in main thread... closing server.\n",
865
+ "Killing tunnel 127.0.0.1:7860 <> https://0e7b0972f54b527958.gradio.live\n"
866
+ ]
867
+ },
868
+ {
869
+ "ename": "TypeError",
870
+ "evalue": "EventListener._setup.<locals>.event_trigger() got an unexpected keyword argument '_js'",
871
+ "output_type": "error",
872
+ "traceback": [
873
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
874
+ "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
875
+ "Cell \u001b[0;32mIn[75], line 49\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;66;03m# Apply custom CSS using JavaScript\u001b[39;00m\n\u001b[1;32m 48\u001b[0m block\u001b[38;5;241m.\u001b[39mlaunch(debug\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, inbrowser\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,share\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 49\u001b[0m \u001b[43mblock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_js\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;43m (function() \u001b[39;49m\u001b[38;5;124;43m{\u001b[39;49m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;124;43m var style = document.createElement(\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mstyle\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m);\u001b[39;49m\n\u001b[1;32m 52\u001b[0m \u001b[38;5;124;43m style.innerHTML = `\u001b[39;49m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;124;43m #message-box textarea \u001b[39;49m\u001b[38;5;124;43m{\u001b[39;49m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;124;43m border: 1px solid black !important;\u001b[39;49m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;124;43m }\u001b[39;49m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;124;43m `;\u001b[39;49m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;124;43m document.head.appendChild(style);\u001b[39;49m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;124;43m })();\u001b[39;49m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
876
+ "\u001b[0;31mTypeError\u001b[0m: EventListener._setup.<locals>.event_trigger() got an unexpected keyword argument '_js'"
877
+ ]
878
+ }
879
+ ],
880
+ "source": [
881
+ "import base64\n",
882
+ "import gradio as gr\n",
883
+ "bucket_name='emd-forecast'\n",
884
+ "file_key = 'gtn/input/logo_ktbot.PNG'\n",
885
+ "\n",
886
+ "# Download the file from S3\n",
887
+ "response = s3.get_object(Bucket=bucket_name, Key=file_key)\n",
888
+ "file_content = response['Body'].read()\n",
889
+ "\n",
890
+ "# Encode the file content in base64\n",
891
+ "png_base64 = base64.b64encode(file_content).decode('utf-8')\n",
892
+ "png_data_url = f\"data:image/png;base64,{png_base64}\"\n",
893
+ "\n",
894
+ "\n",
895
+ "\n",
896
+ "\n",
897
+ "def message_and_history(input, history):\n",
898
+ " history = history or []\n",
899
+ " s = list(sum(history, ()))\n",
900
+ " s.append(input)\n",
901
+ " inp = ' '.join(s)\n",
902
+ " output = get_answer(inp)\n",
903
+ " history.append((input, output))\n",
904
+ " return history, history\n",
905
+ "\n",
906
+ "\n",
907
+ "block = gr.Blocks(theme=gr.themes.Soft())\n",
908
+ "\n",
909
+ "with block:\n",
910
+ " with gr.Row():\n",
911
+ " with gr.Column(scale=1, min_width=300):\n",
912
+ " gr.Markdown(f\"\"\"\n",
913
+ " <div style=\"display: flex; align-items: center; justify-content: center; height: calc(100% - 50px);\">\n",
914
+ " <img src=\"{png_data_url}\" alt=\"Logo\" style=\"height: auto; max-height: 100%;\">\n",
915
+ " </div>\n",
916
+ "\n",
917
+ " \"\"\")\n",
918
+ " with gr.Column(scale=2):\n",
919
+ " chatbot = gr.Chatbot(label=\"KT Bot\")\n",
920
+ " message = gr.Textbox(placeholder=\"Your KT bot, ask me anything...\",label=\"Query\")\n",
921
+ " state = gr.State()\n",
922
+ " submit = gr.Button(\"SEND\")\n",
923
+ " submit.click(message_and_history, \n",
924
+ " inputs=[message, state], \n",
925
+ " outputs=[chatbot, state])\n",
926
+ "\n",
927
+ "# Apply custom CSS using JavaScript\n",
928
+ "block.launch(debug=True, inbrowser=True,share=True)\n",
929
+ "block.load(None, None, None, _js=\"\"\"\n",
930
+ " (function() {\n",
931
+ " var style = document.createElement('style');\n",
932
+ " style.innerHTML = `\n",
933
+ " #message-box textarea {\n",
934
+ " border: 1px solid black !important;\n",
935
+ " }\n",
936
+ " `;\n",
937
+ " document.head.appendChild(style);\n",
938
+ " })();\n",
939
+ "\"\"\")\n"
940
+ ]
941
+ },
942
+ {
943
+ "cell_type": "code",
944
+ "execution_count": 8,
945
+ "id": "2ed00191",
946
+ "metadata": {},
947
+ "outputs": [
948
+ {
949
+ "name": "stdout",
950
+ "output_type": "stream",
951
+ "text": [
952
+ "Requirement already satisfied: urllib3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (1.26.18)\n",
953
+ "Collecting urllib3\n",
954
+ " Using cached urllib3-2.2.1-py3-none-any.whl.metadata (6.4 kB)\n",
955
+ "Requirement already satisfied: pyopenssl in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (24.0.0)\n",
956
+ "Collecting pyopenssl\n",
957
+ " Downloading pyOpenSSL-24.1.0-py3-none-any.whl.metadata (12 kB)\n",
958
+ "Requirement already satisfied: cryptography<43,>=41.0.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from pyopenssl) (42.0.5)\n",
959
+ "Requirement already satisfied: cffi>=1.12 in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from cryptography<43,>=41.0.5->pyopenssl) (1.16.0)\n",
960
+ "Requirement already satisfied: pycparser in /home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages (from cffi>=1.12->cryptography<43,>=41.0.5->pyopenssl) (2.21)\n",
961
+ "Using cached urllib3-2.2.1-py3-none-any.whl (121 kB)\n",
962
+ "Downloading pyOpenSSL-24.1.0-py3-none-any.whl (56 kB)\n",
963
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
964
+ "\u001b[?25hInstalling collected packages: urllib3, pyopenssl\n",
965
+ " Attempting uninstall: urllib3\n",
966
+ " Found existing installation: urllib3 1.26.18\n",
967
+ " Uninstalling urllib3-1.26.18:\n",
968
+ " Successfully uninstalled urllib3-1.26.18\n",
969
+ " Attempting uninstall: pyopenssl\n",
970
+ " Found existing installation: pyOpenSSL 24.0.0\n",
971
+ " Uninstalling pyOpenSSL-24.0.0:\n",
972
+ " Successfully uninstalled pyOpenSSL-24.0.0\n",
973
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
974
+ "botocore 1.31.85 requires urllib3<2.1,>=1.25.4; python_version >= \"3.10\", but you have urllib3 2.2.1 which is incompatible.\n",
975
+ "requests 2.28.1 requires urllib3<1.27,>=1.21.1, but you have urllib3 2.2.1 which is incompatible.\n",
976
+ "sagemaker 2.219.0 requires boto3<2.0,>=1.33.3, but you have boto3 1.28.44 which is incompatible.\n",
977
+ "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.2.2 which is incompatible.\n",
978
+ "sphinx 7.2.6 requires docutils<0.21,>=0.18.1, but you have docutils 0.16 which is incompatible.\u001b[0m\u001b[31m\n",
979
+ "\u001b[0mSuccessfully installed pyopenssl-24.1.0 urllib3-2.2.1\n"
980
+ ]
981
+ }
982
+ ],
983
+ "source": [
984
+ "!pip install --upgrade urllib3 pyopenssl\n"
985
+ ]
986
+ },
987
+ {
988
+ "cell_type": "code",
989
+ "execution_count": null,
990
+ "id": "b80e67ff",
991
+ "metadata": {},
992
+ "outputs": [],
993
+ "source": [
994
+ "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"
995
+ ]
996
+ },
997
+ {
998
+ "cell_type": "code",
999
+ "execution_count": 79,
1000
+ "id": "0751314c",
1001
+ "metadata": {},
1002
+ "outputs": [
1003
+ {
1004
+ "name": "stdout",
1005
+ "output_type": "stream",
1006
+ "text": [
1007
+ "Need \u001b[32m'write'\u001b[0m access token to create a Spaces repo.\n",
1008
+ "\n",
1009
+ " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n",
1010
+ " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
1011
+ " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n",
1012
+ " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
1013
+ " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n",
1014
+ "\n",
1015
+ " To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
1016
+ "Enter your token (input will not be visible): \n",
1017
+ "\u001b[31mAborted.\u001b[0m\n"
1018
+ ]
1019
+ }
1020
+ ],
1021
+ "source": [
1022
+ "!gradio deploy"
1023
+ ]
1024
+ },
1025
+ {
1026
+ "cell_type": "code",
1027
+ "execution_count": null,
1028
+ "id": "870ba009",
1029
+ "metadata": {},
1030
+ "outputs": [
1031
+ {
1032
+ "name": "stdout",
1033
+ "output_type": "stream",
1034
+ "text": [
1035
+ "Need \u001b[32m'write'\u001b[0m access token to create a Spaces repo.\r\n",
1036
+ "\r\n",
1037
+ " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\r\n",
1038
+ " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\r\n",
1039
+ " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\r\n",
1040
+ " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\r\n",
1041
+ " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\r\n",
1042
+ "\r\n",
1043
+ " A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.\r\n",
1044
+ " Setting a new token will erase the existing one.\r\n",
1045
+ " To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\r\n",
1046
+ "Enter your token (input will not be visible): "
1047
+ ]
1048
+ }
1049
+ ],
1050
+ "source": [
1051
+ "!gradio deploy\n"
1052
+ ]
1053
+ },
1054
+ {
1055
+ "cell_type": "code",
1056
+ "execution_count": null,
1057
+ "id": "ec155eeb",
1058
+ "metadata": {},
1059
+ "outputs": [],
1060
+ "source": []
1061
+ }
1062
+ ],
1063
+ "metadata": {
1064
+ "kernelspec": {
1065
+ "display_name": "conda_python3",
1066
+ "language": "python",
1067
+ "name": "conda_python3"
1068
+ },
1069
+ "language_info": {
1070
+ "codemirror_mode": {
1071
+ "name": "ipython",
1072
+ "version": 3
1073
+ },
1074
+ "file_extension": ".py",
1075
+ "mimetype": "text/x-python",
1076
+ "name": "python",
1077
+ "nbconvert_exporter": "python",
1078
+ "pygments_lexer": "ipython3",
1079
+ "version": "3.10.14"
1080
+ }
1081
+ },
1082
+ "nbformat": 4,
1083
+ "nbformat_minor": 5
1084
+ }
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Knowledgebot
3
- emoji: πŸ†
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 4.36.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: knowledgebot
3
+ app_file: KTBOT.ipynb
 
 
4
  sdk: gradio
5
  sdk_version: 4.36.1
 
 
6
  ---
 
 
cacert.pem ADDED
The diff for this file is too large to render. See raw diff