Spaces:
Sleeping
Sleeping
Fixed transactions rag jupyter notebook errors
Browse files
app/categorization/categorizer.py
CHANGED
@@ -3,6 +3,7 @@ import re
|
|
3 |
import ast
|
4 |
import json
|
5 |
import logging
|
|
|
6 |
from typing import Any, List, Tuple, Optional, Dict, Union
|
7 |
|
8 |
# Third-party library imports
|
@@ -14,12 +15,12 @@ from tenacity import retry, wait_random_exponential, stop_after_attempt
|
|
14 |
from pydantic import ValidationError
|
15 |
|
16 |
# Local application/library specific imports
|
17 |
-
from
|
18 |
from langchain.chains import LLMChain
|
19 |
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
|
20 |
from langchain.prompts import PromptTemplate
|
21 |
-
import template as
|
22 |
-
from config import CATEGORY_REFERENCE_OUTPUT_FILE, TX_PER_LLM_RUN
|
23 |
|
24 |
|
25 |
def fuzzy_match_list_categorizer(
|
@@ -68,12 +69,14 @@ async def llm_list_categorizer(tx_list: pd.DataFrame) -> pd.DataFrame:
|
|
68 |
"""
|
69 |
|
70 |
# Initialize language model and prompt
|
71 |
-
|
72 |
-
|
|
|
|
|
73 |
chain = LLMChain(llm=llm, prompt=prompt)
|
74 |
|
75 |
# Iterate over the DataFrame in batches of TX_PER_LLM_RUN transactions
|
76 |
-
tasks = [llm_sublist_categorizer(tx_list.attrs['file_name'], chain=chain, tx_descriptions="\n".join(chunk['description']).strip())
|
77 |
for chunk in np.array_split(tx_list, tx_list.shape[0] // TX_PER_LLM_RUN + 1)]
|
78 |
|
79 |
# Gather results and extract (valid) outputs
|
@@ -87,7 +90,7 @@ async def llm_list_categorizer(tx_list: pd.DataFrame) -> pd.DataFrame:
|
|
87 |
valid_outputs = [output for valid_result in valid_results for output in valid_result]
|
88 |
|
89 |
# Return a DataFrame with the valid outputs
|
90 |
-
return pd.DataFrame(valid_outputs, columns=['description', 'category'])
|
91 |
|
92 |
|
93 |
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
|
|
3 |
import ast
|
4 |
import json
|
5 |
import logging
|
6 |
+
import os
|
7 |
from typing import Any, List, Tuple, Optional, Dict, Union
|
8 |
|
9 |
# Third-party library imports
|
|
|
15 |
from pydantic import ValidationError
|
16 |
|
17 |
# Local application/library specific imports
|
18 |
+
from langchain_openai import ChatOpenAI
|
19 |
from langchain.chains import LLMChain
|
20 |
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
|
21 |
from langchain.prompts import PromptTemplate
|
22 |
+
import app.categorization.template as CATEGORY_TEMPLATE
|
23 |
+
from app.categorization.config import CATEGORY_REFERENCE_OUTPUT_FILE, TX_PER_LLM_RUN
|
24 |
|
25 |
|
26 |
def fuzzy_match_list_categorizer(
|
|
|
69 |
"""
|
70 |
|
71 |
# Initialize language model and prompt
|
72 |
+
# openai_api_key = os.environ['OPENAI_API_KEY']
|
73 |
+
# print("apikey: " + openai_api_key)
|
74 |
+
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125", api_key="sk-codepath-group-project-3WwlG0qG8GDG7SMVjgFLT3BlbkFJOHIlVsq0UXFqaOV7hl3O")
|
75 |
+
prompt = PromptTemplate.from_template(template=CATEGORY_TEMPLATE)
|
76 |
chain = LLMChain(llm=llm, prompt=prompt)
|
77 |
|
78 |
# Iterate over the DataFrame in batches of TX_PER_LLM_RUN transactions
|
79 |
+
tasks = [llm_sublist_categorizer(tx_list.attrs['file_name'], chain=chain, tx_descriptions="\n".join(chunk['name/description']).strip())
|
80 |
for chunk in np.array_split(tx_list, tx_list.shape[0] // TX_PER_LLM_RUN + 1)]
|
81 |
|
82 |
# Gather results and extract (valid) outputs
|
|
|
90 |
valid_outputs = [output for valid_result in valid_results for output in valid_result]
|
91 |
|
92 |
# Return a DataFrame with the valid outputs
|
93 |
+
return pd.DataFrame(valid_outputs, columns=['name/description', 'category'])
|
94 |
|
95 |
|
96 |
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
app/categorization/categorizer_list.py
CHANGED
@@ -6,8 +6,8 @@ from datetime import datetime
|
|
6 |
import pandas as pd
|
7 |
|
8 |
# Local application/library specific imports
|
9 |
-
from config import CATEGORY_REFERENCE_OUTPUT_FILE
|
10 |
-
from categorizer import llm_list_categorizer, fuzzy_match_list_categorizer
|
11 |
|
12 |
|
13 |
async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
|
@@ -29,14 +29,14 @@ async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
|
|
29 |
if os.path.exists(CATEGORY_REFERENCE_OUTPUT_FILE):
|
30 |
# Read description-category pairs from the reference file
|
31 |
description_category_pairs = pd.read_csv(
|
32 |
-
CATEGORY_REFERENCE_OUTPUT_FILE, header=None, names=['description', 'category']
|
33 |
)
|
34 |
|
35 |
# Extract only descriptions for faster matching
|
36 |
-
descriptions = description_category_pairs['description'].values
|
37 |
|
38 |
# Use fuzzy matching to find similar descriptions and assign the category
|
39 |
-
tx_list['category'] = tx_list['description'].apply(
|
40 |
fuzzy_match_list_categorizer,
|
41 |
args=(descriptions, description_category_pairs),
|
42 |
)
|
@@ -44,14 +44,14 @@ async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
|
|
44 |
# Filter out uncategorized transactions, deduplicate, and sort by description
|
45 |
uncategorized_descriptions = (
|
46 |
tx_list[tx_list['category'].isnull()]
|
47 |
-
.drop_duplicates(subset=['description'])
|
48 |
-
.sort_values(by=['description'])
|
49 |
)
|
50 |
|
51 |
# Ask the language model to categorize the remaining descriptions
|
52 |
if not uncategorized_descriptions.empty:
|
53 |
categorized_descriptions = await llm_list_categorizer(
|
54 |
-
uncategorized_descriptions[['description', 'category']]
|
55 |
)
|
56 |
|
57 |
categorized_descriptions.dropna(inplace=True)
|
@@ -59,8 +59,8 @@ async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
|
|
59 |
# Update the category for uncategorized transactions based on the language model results
|
60 |
if not categorized_descriptions.empty:
|
61 |
tx_list['category'] = tx_list['category'].fillna(
|
62 |
-
tx_list['description'].map(
|
63 |
-
categorized_descriptions.set_index('description')['category']
|
64 |
)
|
65 |
)
|
66 |
|
|
|
6 |
import pandas as pd
|
7 |
|
8 |
# Local application/library specific imports
|
9 |
+
from app.categorization.config import CATEGORY_REFERENCE_OUTPUT_FILE
|
10 |
+
from app.categorization.categorizer import llm_list_categorizer, fuzzy_match_list_categorizer
|
11 |
|
12 |
|
13 |
async def categorize_list(tx_list: pd.DataFrame) -> pd.DataFrame:
|
|
|
29 |
if os.path.exists(CATEGORY_REFERENCE_OUTPUT_FILE):
|
30 |
# Read description-category pairs from the reference file
|
31 |
description_category_pairs = pd.read_csv(
|
32 |
+
CATEGORY_REFERENCE_OUTPUT_FILE, header=None, names=['name/description', 'category']
|
33 |
)
|
34 |
|
35 |
# Extract only descriptions for faster matching
|
36 |
+
descriptions = description_category_pairs['name/description'].values
|
37 |
|
38 |
# Use fuzzy matching to find similar descriptions and assign the category
|
39 |
+
tx_list['category'] = tx_list['name/description'].apply(
|
40 |
fuzzy_match_list_categorizer,
|
41 |
args=(descriptions, description_category_pairs),
|
42 |
)
|
|
|
44 |
# Filter out uncategorized transactions, deduplicate, and sort by description
|
45 |
uncategorized_descriptions = (
|
46 |
tx_list[tx_list['category'].isnull()]
|
47 |
+
.drop_duplicates(subset=['name/description'])
|
48 |
+
.sort_values(by=['name/description'])
|
49 |
)
|
50 |
|
51 |
# Ask the language model to categorize the remaining descriptions
|
52 |
if not uncategorized_descriptions.empty:
|
53 |
categorized_descriptions = await llm_list_categorizer(
|
54 |
+
uncategorized_descriptions[['name/description', 'category']]
|
55 |
)
|
56 |
|
57 |
categorized_descriptions.dropna(inplace=True)
|
|
|
59 |
# Update the category for uncategorized transactions based on the language model results
|
60 |
if not categorized_descriptions.empty:
|
61 |
tx_list['category'] = tx_list['category'].fillna(
|
62 |
+
tx_list['name/description'].map(
|
63 |
+
categorized_descriptions.set_index('name/description')['category']
|
64 |
)
|
65 |
)
|
66 |
|
app/categorization/file_processing.py
CHANGED
@@ -9,8 +9,8 @@ from datetime import datetime
|
|
9 |
import pandas as pd
|
10 |
from dateparser import parse
|
11 |
|
12 |
-
from categorizer_list import categorize_list
|
13 |
-
from config import RESULT_OUTPUT_FILE, CATEGORY_REFERENCE_OUTPUT_FILE
|
14 |
|
15 |
# Read file and process it (e.g. categorize transactions)
|
16 |
async def process_file(file_path: str) -> Dict[str, Union[str, pd.DataFrame]]:
|
|
|
9 |
import pandas as pd
|
10 |
from dateparser import parse
|
11 |
|
12 |
+
from app.categorization.categorizer_list import categorize_list
|
13 |
+
from app.categorization.config import RESULT_OUTPUT_FILE, CATEGORY_REFERENCE_OUTPUT_FILE
|
14 |
|
15 |
# Read file and process it (e.g. categorize transactions)
|
16 |
async def process_file(file_path: str) -> Dict[str, Union[str, pd.DataFrame]]:
|
app/transactions_rag/categorize_transactions.ipynb
CHANGED
@@ -134,40 +134,411 @@
|
|
134 |
},
|
135 |
{
|
136 |
"cell_type": "code",
|
137 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
"metadata": {},
|
139 |
"outputs": [
|
140 |
{
|
141 |
-
"
|
142 |
-
"
|
143 |
-
"
|
144 |
-
|
145 |
-
"\
|
146 |
-
"
|
147 |
-
"
|
148 |
-
"\u001b[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
]
|
150 |
}
|
151 |
],
|
152 |
"source": [
|
153 |
"# Process the transactions csv and get the results from the categorization llm utility\n",
|
154 |
-
"from categorization.file_processing import process_file, save_results\n",
|
155 |
-
"from
|
|
|
156 |
"import asyncio\n",
|
|
|
|
|
|
|
157 |
"\n",
|
158 |
-
"
|
|
|
159 |
"\n",
|
160 |
"async def apply_categorization():\n",
|
161 |
-
" processed_file = process_file(\"transactions_2024.csv\")\n",
|
162 |
"\n",
|
163 |
" print(\"\\nProcessing file\")\n",
|
164 |
" result = await asyncio.gather(processed_file)\n",
|
165 |
"\n",
|
166 |
-
" save_results(
|
167 |
-
" print(
|
168 |
-
"
|
|
|
|
|
|
|
169 |
"\n",
|
170 |
-
"
|
|
|
|
|
171 |
]
|
172 |
}
|
173 |
],
|
|
|
134 |
},
|
135 |
{
|
136 |
"cell_type": "code",
|
137 |
+
"execution_count": 15,
|
138 |
+
"metadata": {},
|
139 |
+
"outputs": [],
|
140 |
+
"source": [
|
141 |
+
"!python3 -m venv venv"
|
142 |
+
]
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"cell_type": "code",
|
146 |
+
"execution_count": 16,
|
147 |
+
"metadata": {},
|
148 |
+
"outputs": [],
|
149 |
+
"source": [
|
150 |
+
"!source venv/bin/activate"
|
151 |
+
]
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"cell_type": "code",
|
155 |
+
"execution_count": 23,
|
156 |
+
"metadata": {},
|
157 |
+
"outputs": [
|
158 |
+
{
|
159 |
+
"name": "stdout",
|
160 |
+
"output_type": "stream",
|
161 |
+
"text": [
|
162 |
+
"Defaulting to user installation because normal site-packages is not writeable\n",
|
163 |
+
"Requirement already satisfied: setuptools in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (70.0.0)\n"
|
164 |
+
]
|
165 |
+
}
|
166 |
+
],
|
167 |
+
"source": [
|
168 |
+
"!pip3 install --upgrade setuptools"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"cell_type": "code",
|
173 |
+
"execution_count": 2,
|
174 |
+
"metadata": {},
|
175 |
+
"outputs": [
|
176 |
+
{
|
177 |
+
"data": {
|
178 |
+
"text/plain": [
|
179 |
+
"['Defaulting to user installation because normal site-packages is not writeable',\n",
|
180 |
+
" 'Requirement already satisfied: langchain in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 1)) (0.2.1)',\n",
|
181 |
+
" 'Collecting langchain_openai (from -r requirements.txt (line 2))',\n",
|
182 |
+
" ' Downloading langchain_openai-0.1.7-py3-none-any.whl.metadata (2.5 kB)',\n",
|
183 |
+
" 'Collecting python-dotenv (from -r requirements.txt (line 3))',\n",
|
184 |
+
" ' Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)',\n",
|
185 |
+
" 'Collecting openai (from -r requirements.txt (line 4))',\n",
|
186 |
+
" ' Using cached openai-1.30.4-py3-none-any.whl.metadata (21 kB)',\n",
|
187 |
+
" 'Requirement already satisfied: tenacity in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 5)) (8.3.0)',\n",
|
188 |
+
" 'Requirement already satisfied: rapidfuzz in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 6)) (3.9.1)',\n",
|
189 |
+
" 'Requirement already satisfied: pydantic in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 7)) (2.7.1)',\n",
|
190 |
+
" 'Requirement already satisfied: dateparser in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 8)) (1.2.0)',\n",
|
191 |
+
" 'Requirement already satisfied: pandas in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from -r requirements.txt (line 9)) (2.0.3)',\n",
|
192 |
+
" 'Collecting path (from -r requirements.txt (line 10))',\n",
|
193 |
+
" ' Using cached path-16.14.0-py3-none-any.whl.metadata (6.3 kB)',\n",
|
194 |
+
" 'Collecting aiostream==0.5.2 (from -r requirements.txt (line 12))',\n",
|
195 |
+
" ' Using cached aiostream-0.5.2-py3-none-any.whl.metadata (9.9 kB)',\n",
|
196 |
+
" 'Collecting cachetools==5.3.3 (from -r requirements.txt (line 13))',\n",
|
197 |
+
" ' Using cached cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)',\n",
|
198 |
+
" 'Collecting docx2txt==0.8 (from -r requirements.txt (line 14))',\n",
|
199 |
+
" ' Using cached docx2txt-0.8.tar.gz (2.8 kB)',\n",
|
200 |
+
" ' Preparing metadata (setup.py): started',\n",
|
201 |
+
" \" Preparing metadata (setup.py): finished with status 'done'\",\n",
|
202 |
+
" 'Collecting fastapi==0.109.1 (from -r requirements.txt (line 15))',\n",
|
203 |
+
" ' Using cached fastapi-0.109.1-py3-none-any.whl.metadata (25 kB)',\n",
|
204 |
+
" 'Collecting llama-index-agent-openai==0.2.2 (from -r requirements.txt (line 16))',\n",
|
205 |
+
" ' Using cached llama_index_agent_openai-0.2.2-py3-none-any.whl.metadata (677 bytes)',\n",
|
206 |
+
" 'Collecting llama-index-core==0.10.28 (from -r requirements.txt (line 17))',\n",
|
207 |
+
" ' Using cached llama_index_core-0.10.28-py3-none-any.whl.metadata (3.6 kB)',\n",
|
208 |
+
" 'Collecting llama-index-vector-stores-pinecone==0.1.3 (from -r requirements.txt (line 18))',\n",
|
209 |
+
" ' Using cached llama_index_vector_stores_pinecone-0.1.3-py3-none-any.whl.metadata (725 bytes)',\n",
|
210 |
+
" 'Collecting llama-index==0.10.28 (from -r requirements.txt (line 19))',\n",
|
211 |
+
" ' Using cached llama_index-0.10.28-py3-none-any.whl.metadata (11 kB)',\n",
|
212 |
+
" 'Collecting python-dotenv (from -r requirements.txt (line 3))',\n",
|
213 |
+
" ' Using cached python_dotenv-1.0.0-py3-none-any.whl.metadata (21 kB)',\n",
|
214 |
+
" '\\x1b[31mERROR: Ignored the following versions that require a different python version: 0.0.10 Requires-Python >=3.9,<4.0; 0.0.11 Requires-Python >=3.9,<4.0; 0.0.12 Requires-Python >=3.9,<4.0; 0.0.13 Requires-Python >=3.9,<4.0; 0.0.14 Requires-Python >=3.9,<4.0; 0.0.15 Requires-Python >=3.9,<4.0; 0.0.16 Requires-Python >=3.9,<4.0; 0.0.17 Requires-Python >=3.9,<4.0; 0.0.18 Requires-Python >=3.9,<4.0; 0.0.19 Requires-Python >=3.9,<4.0; 0.0.20 Requires-Python >=3.9,<4.0; 0.0.21 Requires-Python >=3.9,<4.0; 0.0.22 Requires-Python >=3.9,<4.0; 0.0.23 Requires-Python >=3.9,<4.0; 0.0.24 Requires-Python >=3.9,<4.0; 0.0.25 Requires-Python >=3.9,<4.0; 0.14.3 Requires-Python >=3.9,<4; 0.14.4 Requires-Python <4,>=3.9; 0.14.5 Requires-Python <4,>=3.9; 0.15.0 Requires-Python <4,>=3.9; 0.15.1 Requires-Python <4,>=3.9; 0.15.10 Requires-Python <4,>=3.9; 0.15.11 Requires-Python <4,>=3.9; 0.15.12 Requires-Python <4,>=3.9; 0.15.13 Requires-Python <4,>=3.9; 0.15.2 Requires-Python <4,>=3.9; 0.15.3 Requires-Python <4,>=3.9; 0.15.4 Requires-Python <4,>=3.9; 0.15.5 Requires-Python <4,>=3.9; 0.15.6 Requires-Python <4,>=3.9; 0.15.7 Requires-Python <4,>=3.9; 0.15.8 Requires-Python <4,>=3.9; 0.15.9 Requires-Python <4,>=3.9; 0.16.0 Requires-Python <4,>=3.9; 0.16.1 Requires-Python <4,>=3.9; 0.16.2 Requires-Python <4,>=3.9; 0.16.3 Requires-Python <4,>=3.9; 0.16.4 Requires-Python <4,>=3.9; 0.16.5 Requires-Python <4,>=3.9; 0.16.6 Requires-Python <4,>=3.9; 0.16.7 Requires-Python <4,>=3.9; 0.16.8 Requires-Python <4,>=3.9; 0.16.9 Requires-Python <4,>=3.9; 0.17.0 Requires-Python <4,>=3.9; 0.17.1 Requires-Python <4,>=3.9; 0.17.2 Requires-Python <4,>=3.9; 0.17.3 Requires-Python <4,>=3.9; 0.17.4 Requires-Python <4,>=3.9; 0.17.5 Requires-Python <4,>=3.9; 0.17.6 Requires-Python <4,>=3.9; 0.17.7 Requires-Python <4,>=3.9; 0.18.0 Requires-Python <4,>=3.9; 0.18.1 Requires-Python <4,>=3.9; 0.18.2 Requires-Python <4,>=3.9; 0.19.0 Requires-Python <4,>=3.9; 0.20.0 Requires-Python <4,>=3.9; 0.21.0 Requires-Python <4,>=3.9\\x1b[0m\\x1b[31m',\n",
|
215 |
+
" '\\x1b[0m\\x1b[31mERROR: Could not find a version that satisfies the requirement traceloop-sdk==0.15.11 (from versions: 0.0.26, 0.0.27b0, 0.0.27, 0.0.28, 0.0.29, 0.0.31, 0.0.32, 0.0.33, 0.0.34, 0.0.35, 0.0.36, 0.0.37, 0.0.38, 0.0.39, 0.0.40, 0.0.41, 0.0.42, 0.0.43b0, 0.0.43b2, 0.0.43b3, 0.0.43b4, 0.0.43, 0.0.44b0, 0.0.44, 0.0.46, 0.0.47, 0.0.48, 0.0.49, 0.0.50, 0.0.51, 0.0.52, 0.0.53, 0.0.54, 0.0.55, 0.0.56a0, 0.0.56a1, 0.0.56a2, 0.0.56a3, 0.0.56a4, 0.0.56a5, 0.0.56a6, 0.0.56, 0.0.57, 0.0.58, 0.0.59, 0.0.60a0, 0.0.60a1, 0.0.60, 0.0.61a0, 0.0.61, 0.0.62, 0.0.63, 0.0.64a0, 0.0.64, 0.0.65, 0.0.66, 0.0.67, 0.0.68, 0.0.69, 0.0.70, 0.1.3, 0.1.5, 0.1.6, 0.1.7, 0.1.8, 0.1.9, 0.1.10, 0.1.11, 0.1.12, 0.2.0, 0.2.1, 0.3.0, 0.3.1, 0.3.2, 0.3.3, 0.3.4, 0.3.5, 0.3.6, 0.4.0, 0.4.1, 0.4.2, 0.5.0, 0.5.1, 0.5.2, 0.5.3, 0.6.0, 0.7.0, 0.8.0, 0.8.2, 0.9.0, 0.9.1, 0.9.2, 0.9.3, 0.9.4, 0.10.0, 0.10.1, 0.10.2, 0.10.3, 0.10.4, 0.10.5, 0.11.0, 0.11.1, 0.11.2, 0.11.3, 0.12.0, 0.12.1, 0.12.2, 0.12.3, 0.12.4, 0.12.5, 0.13.0, 0.13.1, 0.13.2, 0.13.3, 0.14.0, 0.14.1, 0.14.2)\\x1b[0m\\x1b[31m',\n",
|
216 |
+
" '\\x1b[0m\\x1b[31mERROR: No matching distribution found for traceloop-sdk==0.15.11\\x1b[0m\\x1b[31m',\n",
|
217 |
+
" '\\x1b[0m']"
|
218 |
+
]
|
219 |
+
},
|
220 |
+
"execution_count": 2,
|
221 |
+
"metadata": {},
|
222 |
+
"output_type": "execute_result"
|
223 |
+
}
|
224 |
+
],
|
225 |
+
"source": [
|
226 |
+
"!!pip3 install -r requirements.txt"
|
227 |
+
]
|
228 |
+
},
|
229 |
+
{
|
230 |
+
"cell_type": "code",
|
231 |
+
"execution_count": 28,
|
232 |
+
"metadata": {},
|
233 |
+
"outputs": [
|
234 |
+
{
|
235 |
+
"name": "stdout",
|
236 |
+
"output_type": "stream",
|
237 |
+
"text": [
|
238 |
+
"Defaulting to user installation because normal site-packages is not writeable\n",
|
239 |
+
"Collecting dateparser\n",
|
240 |
+
" Using cached dateparser-1.2.0-py2.py3-none-any.whl.metadata (28 kB)\n",
|
241 |
+
"Requirement already satisfied: python-dateutil in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from dateparser) (2.9.0.post0)\n",
|
242 |
+
"Requirement already satisfied: pytz in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from dateparser) (2024.1)\n",
|
243 |
+
"Collecting regex!=2019.02.19,!=2021.8.27 (from dateparser)\n",
|
244 |
+
" Downloading regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl.metadata (40 kB)\n",
|
245 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m95.7 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
246 |
+
"\u001b[?25hCollecting tzlocal (from dateparser)\n",
|
247 |
+
" Downloading tzlocal-5.2-py3-none-any.whl.metadata (7.8 kB)\n",
|
248 |
+
"Requirement already satisfied: six>=1.5 in /Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/site-packages (from python-dateutil->dateparser) (1.15.0)\n",
|
249 |
+
"Collecting backports.zoneinfo (from tzlocal->dateparser)\n",
|
250 |
+
" Downloading backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl.metadata (4.7 kB)\n",
|
251 |
+
"Downloading dateparser-1.2.0-py2.py3-none-any.whl (294 kB)\n",
|
252 |
+
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m510.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
253 |
+
"\u001b[?25hDownloading regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl (281 kB)\n",
|
254 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m281.8/281.8 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
|
255 |
+
"\u001b[?25hDownloading tzlocal-5.2-py3-none-any.whl (17 kB)\n",
|
256 |
+
"Downloading backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl (35 kB)\n",
|
257 |
+
"Installing collected packages: regex, backports.zoneinfo, tzlocal, dateparser\n",
|
258 |
+
"\u001b[33m WARNING: The script dateparser-download is installed in '/Users/patrickalexis/Library/Python/3.8/bin' which is not on PATH.\n",
|
259 |
+
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
|
260 |
+
"\u001b[0mSuccessfully installed backports.zoneinfo-0.2.1 dateparser-1.2.0 regex-2024.5.15 tzlocal-5.2\n"
|
261 |
+
]
|
262 |
+
}
|
263 |
+
],
|
264 |
+
"source": [
|
265 |
+
"!pip3 install dateparser"
|
266 |
+
]
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"cell_type": "code",
|
270 |
+
"execution_count": 2,
|
271 |
+
"metadata": {},
|
272 |
+
"outputs": [
|
273 |
+
{
|
274 |
+
"name": "stdout",
|
275 |
+
"output_type": "stream",
|
276 |
+
"text": [
|
277 |
+
"Defaulting to user installation because normal site-packages is not writeable\n",
|
278 |
+
"Collecting rapidfuzz\n",
|
279 |
+
" Using cached rapidfuzz-3.9.1-cp38-cp38-macosx_10_9_x86_64.whl.metadata (11 kB)\n",
|
280 |
+
"Using cached rapidfuzz-3.9.1-cp38-cp38-macosx_10_9_x86_64.whl (2.1 MB)\n",
|
281 |
+
"Installing collected packages: rapidfuzz\n",
|
282 |
+
"Successfully installed rapidfuzz-3.9.1\n"
|
283 |
+
]
|
284 |
+
}
|
285 |
+
],
|
286 |
+
"source": [
|
287 |
+
"!pip3 install rapidfuzz"
|
288 |
+
]
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"cell_type": "code",
|
292 |
+
"execution_count": 2,
|
293 |
"metadata": {},
|
294 |
"outputs": [
|
295 |
{
|
296 |
+
"name": "stdout",
|
297 |
+
"output_type": "stream",
|
298 |
+
"text": [
|
299 |
+
"Defaulting to user installation because normal site-packages is not writeable\n",
|
300 |
+
"Collecting dotenv\n",
|
301 |
+
" Downloading dotenv-0.0.5.tar.gz (2.4 kB)\n",
|
302 |
+
" Preparing metadata (setup.py) ... \u001b[?25lerror\n",
|
303 |
+
" \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
|
304 |
+
" \n",
|
305 |
+
" \u001b[31mΓ\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
|
306 |
+
" \u001b[31mβ\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
|
307 |
+
" \u001b[31mβ°β>\u001b[0m \u001b[31m[76 lines of output]\u001b[0m\n",
|
308 |
+
" \u001b[31m \u001b[0m /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/__init__.py:80: _DeprecatedInstaller: setuptools.installer and fetch_build_eggs are deprecated.\n",
|
309 |
+
" \u001b[31m \u001b[0m !!\n",
|
310 |
+
" \u001b[31m \u001b[0m \n",
|
311 |
+
" \u001b[31m \u001b[0m ********************************************************************************\n",
|
312 |
+
" \u001b[31m \u001b[0m Requirements should be satisfied by a PEP 517 installer.\n",
|
313 |
+
" \u001b[31m \u001b[0m If you are using pip, you can try `pip install --use-pep517`.\n",
|
314 |
+
" \u001b[31m \u001b[0m ********************************************************************************\n",
|
315 |
+
" \u001b[31m \u001b[0m \n",
|
316 |
+
" \u001b[31m \u001b[0m !!\n",
|
317 |
+
" \u001b[31m \u001b[0m dist.fetch_build_eggs(dist.setup_requires)\n",
|
318 |
+
" \u001b[31m \u001b[0m \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
|
319 |
+
" \u001b[31m \u001b[0m \n",
|
320 |
+
" \u001b[31m \u001b[0m \u001b[31mΓ\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
|
321 |
+
" \u001b[31m \u001b[0m \u001b[31mβ\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
|
322 |
+
" \u001b[31m \u001b[0m \u001b[31mβ°β>\u001b[0m \u001b[31m[16 lines of output]\u001b[0m\n",
|
323 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m Traceback (most recent call last):\n",
|
324 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"<string>\", line 2, in <module>\n",
|
325 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"<pip-setuptools-caller>\", line 14, in <module>\n",
|
326 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/__init__.py\", line 2, in <module>\n",
|
327 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m from setuptools.extension import Extension, Library\n",
|
328 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/extension.py\", line 5, in <module>\n",
|
329 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m from setuptools.dist import _get_unpatched\n",
|
330 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/dist.py\", line 7, in <module>\n",
|
331 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m from setuptools.command.install import install\n",
|
332 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/command/__init__.py\", line 8, in <module>\n",
|
333 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m from setuptools.command import install_scripts\n",
|
334 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/setuptools/command/install_scripts.py\", line 3, in <module>\n",
|
335 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m from pkg_resources import Distribution, PathMetadata, ensure_directory\n",
|
336 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-wheel-66v7g1h6/distribute_6e0b47b8750a4aa8bfff62a3f867a97f/pkg_resources.py\", line 1518, in <module>\n",
|
337 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m register_loader_type(importlib_bootstrap.SourceFileLoader, DefaultProvider)\n",
|
338 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m AttributeError: module 'importlib._bootstrap' has no attribute 'SourceFileLoader'\n",
|
339 |
+
" \u001b[31m \u001b[0m \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n",
|
340 |
+
" \u001b[31m \u001b[0m \n",
|
341 |
+
" \u001b[31m \u001b[0m \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
|
342 |
+
" \u001b[31m \u001b[0m \u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
|
343 |
+
" \u001b[31m \u001b[0m \n",
|
344 |
+
" \u001b[31m \u001b[0m \u001b[31mΓ\u001b[0m Encountered error while generating package metadata.\n",
|
345 |
+
" \u001b[31m \u001b[0m \u001b[31mβ°β>\u001b[0m See above for output.\n",
|
346 |
+
" \u001b[31m \u001b[0m \n",
|
347 |
+
" \u001b[31m \u001b[0m \u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
|
348 |
+
" \u001b[31m \u001b[0m \u001b[1;36mhint\u001b[0m: See above for details.\n",
|
349 |
+
" \u001b[31m \u001b[0m Traceback (most recent call last):\n",
|
350 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/installer.py\", line 101, in _fetch_build_egg_no_warn\n",
|
351 |
+
" \u001b[31m \u001b[0m subprocess.check_call(cmd)\n",
|
352 |
+
" \u001b[31m \u001b[0m File \"/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/subprocess.py\", line 364, in check_call\n",
|
353 |
+
" \u001b[31m \u001b[0m raise CalledProcessError(retcode, cmd)\n",
|
354 |
+
" \u001b[31m \u001b[0m subprocess.CalledProcessError: Command '['/Applications/Xcode.app/Contents/Developer/usr/bin/python3', '-m', 'pip', '--disable-pip-version-check', 'wheel', '--no-deps', '-w', '/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/tmpnw2yadh3', '--quiet', 'distribute']' returned non-zero exit status 1.\n",
|
355 |
+
" \u001b[31m \u001b[0m \n",
|
356 |
+
" \u001b[31m \u001b[0m The above exception was the direct cause of the following exception:\n",
|
357 |
+
" \u001b[31m \u001b[0m \n",
|
358 |
+
" \u001b[31m \u001b[0m Traceback (most recent call last):\n",
|
359 |
+
" \u001b[31m \u001b[0m File \"<string>\", line 2, in <module>\n",
|
360 |
+
" \u001b[31m \u001b[0m File \"<pip-setuptools-caller>\", line 34, in <module>\n",
|
361 |
+
" \u001b[31m \u001b[0m File \"/private/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/pip-install-ac3_ml88/dotenv_7766945cfd7d4e62965375b30e9a0c21/setup.py\", line 13, in <module>\n",
|
362 |
+
" \u001b[31m \u001b[0m setup(name='dotenv',\n",
|
363 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/__init__.py\", line 102, in setup\n",
|
364 |
+
" \u001b[31m \u001b[0m _install_setup_requires(attrs)\n",
|
365 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/__init__.py\", line 75, in _install_setup_requires\n",
|
366 |
+
" \u001b[31m \u001b[0m _fetch_build_eggs(dist)\n",
|
367 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/__init__.py\", line 80, in _fetch_build_eggs\n",
|
368 |
+
" \u001b[31m \u001b[0m dist.fetch_build_eggs(dist.setup_requires)\n",
|
369 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/dist.py\", line 641, in fetch_build_eggs\n",
|
370 |
+
" \u001b[31m \u001b[0m return _fetch_build_eggs(self, requires)\n",
|
371 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/installer.py\", line 38, in _fetch_build_eggs\n",
|
372 |
+
" \u001b[31m \u001b[0m resolved_dists = pkg_resources.working_set.resolve(\n",
|
373 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/pkg_resources/__init__.py\", line 787, in resolve\n",
|
374 |
+
" \u001b[31m \u001b[0m dist = self._resolve_dist(\n",
|
375 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/pkg_resources/__init__.py\", line 823, in _resolve_dist\n",
|
376 |
+
" \u001b[31m \u001b[0m dist = best[req.key] = env.best_match(\n",
|
377 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/pkg_resources/__init__.py\", line 1093, in best_match\n",
|
378 |
+
" \u001b[31m \u001b[0m return self.obtain(req, installer)\n",
|
379 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/pkg_resources/__init__.py\", line 1104, in obtain\n",
|
380 |
+
" \u001b[31m \u001b[0m return installer(requirement) if installer else None\n",
|
381 |
+
" \u001b[31m \u001b[0m File \"/Users/patrickalexis/Library/Python/3.8/lib/python/site-packages/setuptools/installer.py\", line 103, in _fetch_build_egg_no_warn\n",
|
382 |
+
" \u001b[31m \u001b[0m raise DistutilsError(str(e)) from e\n",
|
383 |
+
" \u001b[31m \u001b[0m distutils.errors.DistutilsError: Command '['/Applications/Xcode.app/Contents/Developer/usr/bin/python3', '-m', 'pip', '--disable-pip-version-check', 'wheel', '--no-deps', '-w', '/var/folders/g9/flbt409n1b7g0ry4b9ffdvnc0000gn/T/tmpnw2yadh3', '--quiet', 'distribute']' returned non-zero exit status 1.\n",
|
384 |
+
" \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n",
|
385 |
+
" \n",
|
386 |
+
" \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
|
387 |
+
"\u001b[?25h\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
|
388 |
+
"\n",
|
389 |
+
"\u001b[31mΓ\u001b[0m Encountered error while generating package metadata.\n",
|
390 |
+
"\u001b[31mβ°β>\u001b[0m See above for output.\n",
|
391 |
+
"\n",
|
392 |
+
"\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
|
393 |
+
"\u001b[1;36mhint\u001b[0m: See above for details.\n"
|
394 |
+
]
|
395 |
+
}
|
396 |
+
],
|
397 |
+
"source": [
|
398 |
+
"!pip3 install dotenv"
|
399 |
+
]
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"cell_type": "code",
|
403 |
+
"execution_count": 10,
|
404 |
+
"metadata": {},
|
405 |
+
"outputs": [],
|
406 |
+
"source": [
|
407 |
+
"# Create data folder\n",
|
408 |
+
"!mkdir -p data/tx_data/output\n",
|
409 |
+
"!mkdir -p data/ref_data"
|
410 |
+
]
|
411 |
+
},
|
412 |
+
{
|
413 |
+
"cell_type": "code",
|
414 |
+
"execution_count": 4,
|
415 |
+
"metadata": {},
|
416 |
+
"outputs": [
|
417 |
+
{
|
418 |
+
"name": "stdout",
|
419 |
+
"output_type": "stream",
|
420 |
+
"text": [
|
421 |
+
"Defaulting to user installation because normal site-packages is not writeable\n",
|
422 |
+
"Collecting openai\n",
|
423 |
+
" Using cached openai-1.30.4-py3-none-any.whl.metadata (21 kB)\n",
|
424 |
+
"Collecting anyio<5,>=3.5.0 (from openai)\n",
|
425 |
+
" Downloading anyio-4.4.0-py3-none-any.whl.metadata (4.6 kB)\n",
|
426 |
+
"Collecting distro<2,>=1.7.0 (from openai)\n",
|
427 |
+
" Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n",
|
428 |
+
"Collecting httpx<1,>=0.23.0 (from openai)\n",
|
429 |
+
" Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)\n",
|
430 |
+
"Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from openai) (2.7.1)\n",
|
431 |
+
"Collecting sniffio (from openai)\n",
|
432 |
+
" Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)\n",
|
433 |
+
"Requirement already satisfied: tqdm>4 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from openai) (4.66.4)\n",
|
434 |
+
"Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from openai) (4.12.0)\n",
|
435 |
+
"Requirement already satisfied: idna>=2.8 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from anyio<5,>=3.5.0->openai) (3.7)\n",
|
436 |
+
"Collecting exceptiongroup>=1.0.2 (from anyio<5,>=3.5.0->openai)\n",
|
437 |
+
" Downloading exceptiongroup-1.2.1-py3-none-any.whl.metadata (6.6 kB)\n",
|
438 |
+
"Requirement already satisfied: certifi in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from httpx<1,>=0.23.0->openai) (2024.2.2)\n",
|
439 |
+
"Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)\n",
|
440 |
+
" Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n",
|
441 |
+
"Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)\n",
|
442 |
+
" Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n",
|
443 |
+
"Requirement already satisfied: annotated-types>=0.4.0 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n",
|
444 |
+
"Requirement already satisfied: pydantic-core==2.18.2 in /Users/patrickalexis/Library/Python/3.8/lib/python/site-packages (from pydantic<3,>=1.9.0->openai) (2.18.2)\n",
|
445 |
+
"Downloading openai-1.30.4-py3-none-any.whl (320 kB)\n",
|
446 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m320.6/320.6 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
447 |
+
"\u001b[?25hDownloading anyio-4.4.0-py3-none-any.whl (86 kB)\n",
|
448 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
449 |
+
"\u001b[?25hDownloading distro-1.9.0-py3-none-any.whl (20 kB)\n",
|
450 |
+
"Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n",
|
451 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
452 |
+
"\u001b[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
|
453 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
454 |
+
"\u001b[?25hDownloading sniffio-1.3.1-py3-none-any.whl (10 kB)\n",
|
455 |
+
"Downloading exceptiongroup-1.2.1-py3-none-any.whl (16 kB)\n",
|
456 |
+
"Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
|
457 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
458 |
+
"\u001b[?25hInstalling collected packages: sniffio, h11, exceptiongroup, distro, httpcore, anyio, httpx, openai\n",
|
459 |
+
"\u001b[33m WARNING: The script distro is installed in '/Users/patrickalexis/Library/Python/3.8/bin' which is not on PATH.\n",
|
460 |
+
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
|
461 |
+
"\u001b[0m\u001b[33m WARNING: The script httpx is installed in '/Users/patrickalexis/Library/Python/3.8/bin' which is not on PATH.\n",
|
462 |
+
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
|
463 |
+
"\u001b[0m\u001b[33m WARNING: The script openai is installed in '/Users/patrickalexis/Library/Python/3.8/bin' which is not on PATH.\n",
|
464 |
+
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
|
465 |
+
"\u001b[0mSuccessfully installed anyio-4.4.0 distro-1.9.0 exceptiongroup-1.2.1 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 openai-1.30.4 sniffio-1.3.1\n"
|
466 |
+
]
|
467 |
+
}
|
468 |
+
],
|
469 |
+
"source": [
|
470 |
+
"!pip3 install openai"
|
471 |
+
]
|
472 |
+
},
|
473 |
+
{
|
474 |
+
"cell_type": "code",
|
475 |
+
"execution_count": 5,
|
476 |
+
"metadata": {},
|
477 |
+
"outputs": [
|
478 |
+
{
|
479 |
+
"name": "stdout",
|
480 |
+
"output_type": "stream",
|
481 |
+
"text": [
|
482 |
+
"\n",
|
483 |
+
"Processing file\n"
|
484 |
+
]
|
485 |
+
},
|
486 |
+
{
|
487 |
+
"name": "stderr",
|
488 |
+
"output_type": "stream",
|
489 |
+
"text": [
|
490 |
+
"ERROR:root:| File: transactions_2024.csv | Unexpected Error: expected str, got module\n"
|
491 |
+
]
|
492 |
+
},
|
493 |
+
{
|
494 |
+
"name": "stdout",
|
495 |
+
"output_type": "stream",
|
496 |
+
"text": [
|
497 |
+
"ERROR processing file transactions_2024.csv: expected str, got module\n",
|
498 |
+
"\n",
|
499 |
+
"Processed 1 files: 0 successful, 1 with errors\n",
|
500 |
+
"\n",
|
501 |
+
"Errors in the following files:\n",
|
502 |
+
" transactions_2024.csv: expected str, got module\n",
|
503 |
+
"\n",
|
504 |
+
"\n",
|
505 |
+
"[{'file_name': 'transactions_2024.csv', 'output': Empty DataFrame\n",
|
506 |
+
"Columns: []\n",
|
507 |
+
"Index: [], 'error': 'expected str, got module'}]\n",
|
508 |
+
"['name/description,category\\n']\n",
|
509 |
+
"None\n"
|
510 |
]
|
511 |
}
|
512 |
],
|
513 |
"source": [
|
514 |
"# Process the transactions csv and get the results from the categorization llm utility\n",
|
515 |
+
"from app.categorization.file_processing import process_file, save_results\n",
|
516 |
+
"from app.categorization.config import CATEGORY_REFERENCE_OUTPUT_FILE\n",
|
517 |
+
"# from dotenv import load_dotenv\n",
|
518 |
"import asyncio\n",
|
519 |
+
"import os.path\n",
|
520 |
+
"\n",
|
521 |
+
"# load_dotenv()\n",
|
522 |
"\n",
|
523 |
+
"# relative_dir = os.path.dirname(__file__)\n",
|
524 |
+
"# abs_file_path = os.path.join(relative_dir, CATEGORY_REFERENCE_OUTPUT_FILE)\n",
|
525 |
"\n",
|
526 |
"async def apply_categorization():\n",
|
527 |
+
" processed_file = process_file(\"app/transactions_rag/transactions_2024.csv\")\n",
|
528 |
"\n",
|
529 |
" print(\"\\nProcessing file\")\n",
|
530 |
" result = await asyncio.gather(processed_file)\n",
|
531 |
"\n",
|
532 |
+
" save_results(result)\n",
|
533 |
+
" print(result)\n",
|
534 |
+
"\n",
|
535 |
+
" output_file = open(CATEGORY_REFERENCE_OUTPUT_FILE, \"r+\")\n",
|
536 |
+
" print(output_file.readlines())\n",
|
537 |
+
"\n",
|
538 |
"\n",
|
539 |
+
"result = await apply_categorization()\n",
|
540 |
+
"print(result)\n",
|
541 |
+
"\n"
|
542 |
]
|
543 |
}
|
544 |
],
|
requirements.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
aiostream==0.5.2
|
2 |
cachetools==5.3.3
|
3 |
docx2txt==0.8
|
@@ -8,14 +19,4 @@ llama-index-vector-stores-pinecone==0.1.3
|
|
8 |
llama-index==0.10.28
|
9 |
python-dotenv==1.0.0
|
10 |
traceloop-sdk==0.15.11
|
11 |
-
uvicorn==0.23.2
|
12 |
-
|
13 |
-
langchain
|
14 |
-
python-dotenv
|
15 |
-
openai
|
16 |
-
tenacity
|
17 |
-
rapidfuzz
|
18 |
-
pydantic
|
19 |
-
dateparser
|
20 |
-
pandas
|
21 |
-
path
|
|
|
1 |
+
langchain
|
2 |
+
langchain_openai
|
3 |
+
python-dotenv
|
4 |
+
openai
|
5 |
+
tenacity
|
6 |
+
rapidfuzz
|
7 |
+
pydantic
|
8 |
+
dateparser
|
9 |
+
pandas
|
10 |
+
path
|
11 |
+
|
12 |
aiostream==0.5.2
|
13 |
cachetools==5.3.3
|
14 |
docx2txt==0.8
|
|
|
19 |
llama-index==0.10.28
|
20 |
python-dotenv==1.0.0
|
21 |
traceloop-sdk==0.15.11
|
22 |
+
uvicorn==0.23.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|