abhisheky127 commited on
Commit
620ca5b
1 Parent(s): ae9e0c1

updating new preprocess

Browse files
Files changed (1) hide show
  1. app.py +22 -5
app.py CHANGED
@@ -20,12 +20,29 @@ def zero_shot(doc, candidates):
20
  return dict(zip(labels, scores))
21
 
22
  #define a function to preprocess transaction query
 
 
 
 
 
 
 
23
  def preprocess(transaction):
24
- pattern = r'([A-Za-z0-9\s]+)(?:/| |$)'
25
- match = re.search(pattern, transaction)
26
- if match:
27
- return match.group(1).strip()
28
- return None
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  #create input and output objects
 
20
  return dict(zip(labels, scores))
21
 
22
  #define a function to preprocess transaction query
23
+ # def preprocess(transaction):
24
+ # pattern = r'([A-Za-z0-9\s]+)(?:/| |$)'
25
+ # match = re.search(pattern, transaction)
26
+ # if match:
27
+ # return match.group(1).strip()
28
+ # return None
29
+
30
  def preprocess(transaction):
31
+ remove_words = ["pos", "mps", "bil", "onl"]
32
+
33
+ # Convert to lowercase
34
+ transaction = transaction.lower()
35
+
36
+ # Remove unwanted words
37
+ for word in remove_words:
38
+ transaction = transaction.replace(word, "")
39
+
40
+ # Remove special characters and digits
41
+ transaction = re.sub(r"[^a-z\s]+", "", transaction)
42
+
43
+ # Remove extra spaces
44
+ transaction = re.sub(r"\s+", " ", transaction).strip()
45
+ return transaction
46
 
47
 
48
  #create input and output objects