Spaces:
Running
Running
File size: 3,561 Bytes
2d3888b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import re
def parse_text(input_text):
# Define patterns for response and clarification
response_pattern = re.compile(r'<response>(.*?)<\/response>', re.DOTALL)
clarification_pattern = re.compile(r'<clarification>(.*?)<\/clarification>', re.DOTALL)
# Find all matches for response and clarification
response_matches = response_pattern.finditer(input_text)
clarification_matches = clarification_pattern.finditer(input_text)
# Initialize variables to keep track of the position
last_end = 0
combined_response = ""
parsed_clarifications = []
# Combine responses and capture everything in between
for response_match in response_matches:
# Capture text before the current response tag
combined_response += input_text[last_end:response_match.start()].strip() + "\n"
# Add the response content
combined_response += response_match.group(1).strip() + "\n"
# Update the last end position
last_end = response_match.end()
# Check for clarifications and parse them
for clarification_match in clarification_matches:
# Capture text before the current clarification tag
combined_response += input_text[last_end:clarification_match.start()].strip() + "\n"
# Process the clarification block
clarification_text = clarification_match.group(1).strip()
if clarification_text:
# Split by "text:" to separate each question block
question_blocks = clarification_text.split("- text:")
# Loop through each block and extract the question and its options
for block in question_blocks[1:]:
# Extract the question using regex (up to the "options:" part)
question_match = re.search(r'^(.*?)\s*options:', block, re.DOTALL)
if question_match:
question = question_match.group(1).strip()
# Extract the options using regex
options_match = re.search(r'options:\s*(.*?)$', block, re.DOTALL)
if options_match:
options = [option.strip() for option in options_match.group(1).split('-') if option.strip()]
# Add the parsed question and options to the list
parsed_clarifications.append({'question': question, 'options': options})
# Update the last end position
last_end = clarification_match.end()
# Capture any remaining text after the last tag
combined_response += input_text[last_end:].strip()
return combined_response.strip(), parsed_clarifications
# Example usage
input_text = """
Some introductory text that should be included in the response.
<response>response to previous question is provided here</response>
Some more text that should also be included in the response.
<clarification>
questions:
- text: What topic should the article cover?
options:
- Technology
- Health and Wellness
- Travel
- Other
- text: What is the target audience for the article?
options:
- General public
- Professionals in a specific field
- Students
- Other
</clarification>
Final notes that should be part of the response.
"""
parsed_data = parse_text(input_text)
print(f"Response: {parsed_data['response']}")
print("Clarifications:")
for item in parsed_data['clarifications']:
print(f" Question: {item['question']}")
print(" Options:", ", ".join(item['options']))
|