ENGLISH_ASR / processDoubles.py
cdactvm's picture
Upload 6 files
3b58a97 verified
raw
history blame
2.47 kB
#!/usr/bin/env python
# coding: utf-8
# In[1]:
# # Function to process "double" followed by a number
# def process_doubles(sentence):
# tokens = sentence.split()
# result = []
# i = 0
# while i < len(tokens):
# if tokens[i] == "double":
# if i + 1 < len(tokens):
# result.append(tokens[i + 1])
# result.append(tokens[i + 1])
# i += 2
# else:
# result.append(tokens[i])
# i += 1
# else:
# result.append(tokens[i])
# i += 1
# return ' '.join(result)
# In[ ]:
# import re
# def process_doubles(sentence):
# # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
# sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)
# tokens = sentence.split()
# result = []
# i = 0
# while i < len(tokens):
# if tokens[i] == "डबल":
# if i + 1 < len(tokens):
# result.append(tokens[i + 1]) # Append the next word/number
# result.append(tokens[i + 1]) # Append the next word/number again to duplicate
# i += 2 # Skip over the next word since it's already processed
# else:
# result.append(tokens[i])
# i += 1
# else:
# result.append(tokens[i])
# i += 1
# return ' '.join(result)
# In[2]:
# Function to process "double" and "triple" followed by a number
def process_multiples(sentence):
tokens = sentence.split()
result = []
i = 0
while i < len(tokens):
if tokens[i] == "double":
if i + 1 < len(tokens):
result.append(tokens[i + 1])
result.append(tokens[i + 1])
i += 2
else:
result.append(tokens[i])
i += 1
elif tokens[i] == "triple":
if i + 1 < len(tokens):
result.append(tokens[i + 1])
result.append(tokens[i + 1])
result.append(tokens[i + 1])
i += 2
else:
result.append(tokens[i])
i += 1
else:
result.append(tokens[i])
i += 1
return ' '.join(result)
# In[ ]: