import re from nltk.corpus import stopwords def find_common_subsequences(sentence, str_list): stop_words = set(stopwords.words('english')) sentence = sentence.lower() str_list = [s.lower() for s in str_list] def is_present(subseq, str_list): subseq_regex = re.compile(r'\b' + re.escape(subseq) + r'\b') return all(subseq_regex.search(s) for s in str_list) def remove_stop_words_and_special_chars(sentence): sentence = re.sub(r'[^\w\s]', '', sentence) words = sentence.split() filtered_words = [word for word in words if word.lower() not in stop_words] return " ".join(filtered_words) sentence = remove_stop_words_and_special_chars(sentence) str_list = [remove_stop_words_and_special_chars(s) for s in str_list] words = sentence.split() common_grams = [] added_phrases = set() for n in range(5, 0, -1): for i in range(len(words) - n + 1): subseq = " ".join(words[i:i+n]) if is_present(subseq, str_list) and not any(subseq in phrase for phrase in added_phrases): common_grams.append((i, subseq)) added_phrases.add(subseq) # Sort by the first appearance in the original sentence common_grams.sort(key=lambda x: x[0]) # Assign indices based on the sorted order indexed_common_grams = [(index + 1, subseq) for index, (_, subseq) in enumerate(common_grams)] return indexed_common_grams # Example usage sentence = "Billie Eilish, Charli XCX and Lorde are among a group of young female pop stars who are revealing, in their music, the pressure they have felt to look thin in a time of especially punishing beauty standards." str_list = [ 'Young female pop stars, such as Billie Eilish, Charli XCX, and Lorde, are using their music to showcase how they have been subjected to harsh beauty standards in recent times.', 'A group of young female pop stars, such as Billie Eilish, Charli XCX, and Lorde, are using their music to showcase how they have been subjected to harsh beauty standards in recent times.', 'The music of a group of young female pop stars, such as Billie Eilish, Charli XCX and Lorde, is revealing the pressure to appear slim in an age where beauty is highly regulated.', 'Through their songs, young female pop stars like Billie Eilish, Charli XCX and Lordé reveal the pressure they have been subjected to in order to appear attractive in an age of strict beauty standards.', 'A number of female pop stars, including Billie Eilish, Charli XCx, and Lorde, are using their music to showcase the pressure they have experienced in order not to look unappealing in an age where beauty is highly valued.', "Some young female pop stars, such as Billie Eilish from the R&B Music Hall of Fame in Las Vegas and Charli XCX from Lorde from Manchester's Outer Banks, are using their music to showcase how they have been subjected to harsh beauty standards", 'Among the group of young female pop stars who are using their music to showcase their unappealing appearance, are Billié Eilish and Charli XCX while Lorde is currently struggling to maintain her attractive looks.', 'Young female pop icons such as Billie Eilish, Charliile XCX and Lorde are using their music to showcase the pressure they have been subjected too harshly to look attractive in an age where beauty is highly regulated.', 'Billie Eilish, Charliile XCX and Lordé are just some of the young female pop stars who have been exposing their looks to music in an age where beauty standards are particularly hard to find.', 'In the music industry, young female pop stars like Billie Eilish and Charliile XCx (with an average height of 160cm), as well as Lorde, are displaying how they have been subjected to harsh beauty standards.' ] print(find_common_subsequences(sentence, str_list))