Praneeth Yerrapragada commited on
Commit
5112690
1 Parent(s): ea9333f

chore: remove open_ai_apikey in plain text

Browse files
Files changed (1) hide show
  1. app/categorization/categorizer.py +16 -12
app/categorization/categorizer.py CHANGED
@@ -46,12 +46,13 @@ def fuzzy_match_list_categorizer(
46
  """
47
 
48
  # Fuzzy-match this description against the reference descriptions
49
- match_results = process.extractOne(description, descriptions, score_cutoff=threshold)
 
50
 
51
  # If a match is found, return the category of the matched description
52
  if match_results:
53
  return description_category_pairs.at[match_results[2], 'category']
54
-
55
  return None
56
 
57
 
@@ -69,10 +70,10 @@ async def llm_list_categorizer(tx_list: pd.DataFrame) -> pd.DataFrame:
69
  """
70
 
71
  # Initialize language model and prompt
72
- # openai_api_key = os.environ['OPENAI_API_KEY']
73
- # print("apikey: " + openai_api_key)
74
- llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125", api_key="sk-codepath-group-project-3WwlG0qG8GDG7SMVjgFLT3BlbkFJOHIlVsq0UXFqaOV7hl3O")
75
- prompt = PromptTemplate.from_template(template=CATEGORY_TEMPLATE)
76
  chain = LLMChain(llm=llm, prompt=prompt)
77
 
78
  # Iterate over the DataFrame in batches of TX_PER_LLM_RUN transactions
@@ -87,7 +88,8 @@ async def llm_list_categorizer(tx_list: pd.DataFrame) -> pd.DataFrame:
87
  valid_results = [result['output'] for result in results if result['valid']]
88
 
89
  # Flatten the list of valid results to obtain a single list of description-category pairs
90
- valid_outputs = [output for valid_result in valid_results for output in valid_result]
 
91
 
92
  # Return a DataFrame with the valid outputs
93
  return pd.DataFrame(valid_outputs, columns=['name/description', 'category'])
@@ -120,7 +122,7 @@ async def llm_sublist_categorizer(
120
  try:
121
  # Create a pattern to match a list Description-Category pairs (List[Tuple[str, str]])
122
  pattern = r"\['([^']+)', '([^']+)'\]"
123
-
124
  # Use it to extract all the correctly formatted pairs from the raw result
125
  matches = re.findall(pattern, raw_result.replace("\\'", "'"))
126
 
@@ -131,13 +133,15 @@ async def llm_sublist_categorizer(
131
  parsed_pair = ast.literal_eval(str(list(match)))
132
  valid_outputs.append(parsed_pair)
133
  except Exception as e:
134
- logger.log(logging.ERROR, f"Parsing Error: {e}\nMatch: {match}\n")
 
135
  result['valid'] = False
136
 
137
  result['output'] = valid_outputs
138
 
139
  except Exception as e:
140
- logging.log(logging.ERROR, f"| File: {file_name} | Unexpected Error: {e}\nRaw Result: {raw_result}")
 
141
  result['valid'] = False
142
-
143
- return result
 
46
  """
47
 
48
  # Fuzzy-match this description against the reference descriptions
49
+ match_results = process.extractOne(
50
+ description, descriptions, score_cutoff=threshold)
51
 
52
  # If a match is found, return the category of the matched description
53
  if match_results:
54
  return description_category_pairs.at[match_results[2], 'category']
55
+
56
  return None
57
 
58
 
 
70
  """
71
 
72
  # Initialize language model and prompt
73
+ openai_api_key = os.environ['OPENAI_API_KEY']
74
+ llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125",
75
+ api_key=openai_api_key)
76
+ prompt = PromptTemplate.from_template(template=CATEGORY_TEMPLATE)
77
  chain = LLMChain(llm=llm, prompt=prompt)
78
 
79
  # Iterate over the DataFrame in batches of TX_PER_LLM_RUN transactions
 
88
  valid_results = [result['output'] for result in results if result['valid']]
89
 
90
  # Flatten the list of valid results to obtain a single list of description-category pairs
91
+ valid_outputs = [
92
+ output for valid_result in valid_results for output in valid_result]
93
 
94
  # Return a DataFrame with the valid outputs
95
  return pd.DataFrame(valid_outputs, columns=['name/description', 'category'])
 
122
  try:
123
  # Create a pattern to match a list Description-Category pairs (List[Tuple[str, str]])
124
  pattern = r"\['([^']+)', '([^']+)'\]"
125
+
126
  # Use it to extract all the correctly formatted pairs from the raw result
127
  matches = re.findall(pattern, raw_result.replace("\\'", "'"))
128
 
 
133
  parsed_pair = ast.literal_eval(str(list(match)))
134
  valid_outputs.append(parsed_pair)
135
  except Exception as e:
136
+ logger.log(logging.ERROR,
137
+ f"Parsing Error: {e}\nMatch: {match}\n")
138
  result['valid'] = False
139
 
140
  result['output'] = valid_outputs
141
 
142
  except Exception as e:
143
+ logging.log(
144
+ logging.ERROR, f"| File: {file_name} | Unexpected Error: {e}\nRaw Result: {raw_result}")
145
  result['valid'] = False
146
+
147
+ return result