MetaCLIP / metaclip /substr_matching.py
Hu Xu
Add application file
b94cb82
raw history blame
No virus
909 Bytes
# Copyright (c) Meta Platforms, Inc. and affiliates
spaced_metadata = None
def spacing(text):
puncts_to_wrap = [",", ".", ";", ":", "?", "!", "`"]
chars_to_space = ["\t", "\n", "\r"]
spaced_text = f" {text} "
for punct_to_wrap in puncts_to_wrap:
spaced_text = spaced_text.replace(
punct_to_wrap, f" {punct_to_wrap} "
)
for char_to_space in chars_to_space:
spaced_text = spaced_text.replace(char_to_space, " ")
return spaced_text
def substr_matching(text, metadata):
global spaced_metadata
if spaced_metadata is None:
spaced_metadata = []
for entry in metadata:
spaced_metadata.append(f" {entry} ")
text = spacing(text)
matched_entry_ids = []
for entry_id, entry in enumerate(spaced_metadata):
if entry in text:
matched_entry_ids.append(entry_id)
return matched_entry_ids