Spaces:
Running
Running
import re | |
def parse_text(input_text): | |
# Define patterns for response and clarification | |
response_pattern = re.compile(r'<response>(.*?)<\/response>', re.DOTALL) | |
clarification_pattern = re.compile(r'<clarification>(.*?)<\/clarification>', re.DOTALL) | |
# Find all matches for response and clarification | |
response_matches = response_pattern.finditer(input_text) | |
clarification_matches = clarification_pattern.finditer(input_text) | |
# Initialize variables to keep track of the position | |
last_end = 0 | |
combined_response = "" | |
parsed_clarifications = [] | |
# Combine responses and capture everything in between | |
for response_match in response_matches: | |
# Capture text before the current response tag | |
combined_response += input_text[last_end:response_match.start()].strip() + "\n" | |
# Add the response content | |
combined_response += response_match.group(1).strip() + "\n" | |
# Update the last end position | |
last_end = response_match.end() | |
# Check for clarifications and parse them | |
for clarification_match in clarification_matches: | |
# Capture text before the current clarification tag | |
combined_response += input_text[last_end:clarification_match.start()].strip() + "\n" | |
# Process the clarification block | |
clarification_text = clarification_match.group(1).strip() | |
if clarification_text: | |
# Split by "text:" to separate each question block | |
question_blocks = clarification_text.split("- text:") | |
# Loop through each block and extract the question and its options | |
for block in question_blocks[1:]: | |
# Extract the question using regex (up to the "options:" part) | |
question_match = re.search(r'^(.*?)\s*options:', block, re.DOTALL) | |
if question_match: | |
question = question_match.group(1).strip() | |
# Extract the options using regex | |
options_match = re.search(r'options:\s*(.*?)$', block, re.DOTALL) | |
if options_match: | |
options = [option.strip() for option in options_match.group(1).split('-') if option.strip()] | |
# Add the parsed question and options to the list | |
parsed_clarifications.append({'question': question, 'options': options}) | |
# Update the last end position | |
last_end = clarification_match.end() | |
# Capture any remaining text after the last tag | |
combined_response += input_text[last_end:].strip() | |
return combined_response.strip(), parsed_clarifications | |
# Example usage | |
input_text = """ | |
Some introductory text that should be included in the response. | |
<response>response to previous question is provided here</response> | |
Some more text that should also be included in the response. | |
<clarification> | |
questions: | |
- text: What topic should the article cover? | |
options: | |
- Technology | |
- Health and Wellness | |
- Travel | |
- Other | |
- text: What is the target audience for the article? | |
options: | |
- General public | |
- Professionals in a specific field | |
- Students | |
- Other | |
</clarification> | |
Final notes that should be part of the response. | |
""" | |
parsed_data = parse_text(input_text) | |
print(f"Response: {parsed_data['response']}") | |
print("Clarifications:") | |
for item in parsed_data['clarifications']: | |
print(f" Question: {item['question']}") | |
print(" Options:", ", ".join(item['options'])) | |