Spaces:
Sleeping
Sleeping
| # from __future__ import annotations | |
| # import re | |
| # FLUFF_LINES = { | |
| # "i hope this helps", | |
| # "hope this helps", | |
| # "let me know if you need anything else", | |
| # "thanks", | |
| # } | |
| # def extract_final_answer(text: str) -> str: | |
| # if text is None: | |
| # return "" | |
| # text = str(text).strip() | |
| # if not text: | |
| # return "" | |
| # text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text) | |
| # text = re.sub(r"\s*```$", "", text) | |
| # # Strong preference: explicit final-answer style markers | |
| # explicit_patterns = [ | |
| # r"(?is)\bfinal answer\s*:\s*(.+)$", | |
| # r"(?is)\banswer\s*:\s*(.+)$", | |
| # r"(?is)\bthe answer is\s*:\s*(.+)$", | |
| # r"(?is)\bthe answer is\s+(.+)$", | |
| # ] | |
| # for pattern in explicit_patterns: | |
| # match = re.search(pattern, text) | |
| # if match: | |
| # candidate = match.group(1).strip() | |
| # candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()] | |
| # if candidate_lines: | |
| # return candidate_lines[0] | |
| # lines = [line.strip() for line in text.splitlines() if line.strip()] | |
| # if not lines: | |
| # return "" | |
| # # Prefer short non-fluff lines near the end | |
| # for line in reversed(lines): | |
| # normalized = normalize_basic_answer(line).lower() | |
| # if normalized and normalized not in FLUFF_LINES and len(normalized) <= 200: | |
| # return line | |
| # return lines[-1] | |
| # def normalize_basic_answer(text: str) -> str: | |
| # if text is None: | |
| # return "" | |
| # text = str(text).strip() | |
| # if not text: | |
| # return "" | |
| # text = re.sub(r"\s+", " ", text).strip() | |
| # text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip() | |
| # if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}: | |
| # text = text[1:-1].strip() | |
| # if text.endswith(".") and not re.fullmatch(r"\d+\.\d+", text): | |
| # text = text[:-1].strip() | |
| # return text | |
| # def normalize_final_answer(question: str, text: str) -> str: | |
| # text = normalize_basic_answer(text) | |
| # if not text: | |
| # return "" | |
| # q = question.lower() | |
| # # first name only | |
| # if "give only the first name" in q or "first name only" in q: | |
| # text = re.split(r"\s+", text.strip())[0] | |
| # # last name only | |
| # if "last names only" in q or "use their last names only" in q: | |
| # parts = [part.strip() for part in text.split(",")] | |
| # cleaned_parts = [] | |
| # for part in parts: | |
| # tokens = part.split() | |
| # cleaned_parts.append(tokens[-1] if tokens else part) | |
| # text = ", ".join(cleaned_parts) | |
| # # city only | |
| # if "just give me the city name" in q or "city name without abbreviations" in q: | |
| # text = re.split(r"[,;()\-]", text)[0].strip() | |
| # # comma-delimited / comma separated list | |
| # if "comma separated list" in q or "comma-delimited list" in q or "comma delimited list" in q: | |
| # parts = [p.strip() for p in re.split(r",|\n", text) if p.strip()] | |
| # text = ",".join(parts) | |
| # # ascending order / alphabetical | |
| # if "ascending order" in q: | |
| # try: | |
| # nums = [int(x.strip()) for x in text.split(",") if x.strip()] | |
| # text = ",".join(str(n) for n in sorted(nums)) | |
| # except Exception: | |
| # pass | |
| # if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q: | |
| # parts = [p.strip() for p in text.split(",") if p.strip()] | |
| # if parts: | |
| # text = ",".join(sorted(parts, key=lambda x: x.lower())) | |
| # # two decimal places | |
| # if "two decimal places" in q: | |
| # number_match = re.search(r"-?\d+(?:\.\d+)?", text.replace(",", "")) | |
| # if number_match: | |
| # try: | |
| # value = float(number_match.group(0)) | |
| # text = f"{value:.2f}" | |
| # except Exception: | |
| # pass | |
| # # IOC code / abbreviations / codes often expected uppercase single token | |
| # if "ioc country code" in q: | |
| # text = text.strip().upper() | |
| # # algebraic notation answer should be just one move token-like string | |
| # if "algebraic notation" in q: | |
| # text = text.strip().split()[0] | |
| # return text | |
| # def is_placeholder_answer(text: str) -> bool: | |
| # normalized = normalize_basic_answer(text).lower() | |
| # return normalized in {"", "placeholder", "n/a", "unknown"} | |
| from __future__ import annotations | |
| import re | |
| _FLUFF_LINES = { | |
| "i hope this helps", | |
| "hope this helps", | |
| "let me know if you need anything else", | |
| "thanks", | |
| "thank you", | |
| } | |
| def extract_final_answer(text: str) -> str: | |
| """ | |
| Extract the most likely final answer from raw model output. | |
| Strategy: | |
| - prefer explicit markers like 'Final answer:' | |
| - strip code fences | |
| - if multiline, prefer a short meaningful line near the end | |
| """ | |
| if text is None: | |
| return "" | |
| text = str(text).strip() | |
| if not text: | |
| return "" | |
| text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text) | |
| text = re.sub(r"\s*```$", "", text) | |
| explicit_patterns = [ | |
| r"(?is)\bfinal answer\s*:\s*(.+)$", | |
| r"(?is)\banswer\s*:\s*(.+)$", | |
| r"(?is)\bthe answer is\s*:\s*(.+)$", | |
| r"(?is)\bthe answer is\s+(.+)$", | |
| ] | |
| for pattern in explicit_patterns: | |
| match = re.search(pattern, text) | |
| if match: | |
| candidate = match.group(1).strip() | |
| candidate_lines = [line.strip() for line in candidate.splitlines() if line.strip()] | |
| if candidate_lines: | |
| return candidate_lines[0] | |
| lines = [line.strip() for line in text.splitlines() if line.strip()] | |
| if not lines: | |
| return "" | |
| for line in reversed(lines): | |
| normalized = normalize_basic_answer(line).lower() | |
| if normalized and normalized not in _FLUFF_LINES and len(normalized) <= 200: | |
| return line | |
| return lines[-1] | |
| def normalize_basic_answer(text: str) -> str: | |
| """ | |
| Basic cleanup independent of question format. | |
| """ | |
| if text is None: | |
| return "" | |
| text = str(text).strip() | |
| if not text: | |
| return "" | |
| text = re.sub(r"\s+", " ", text).strip() | |
| text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip() | |
| if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}: | |
| text = text[1:-1].strip() | |
| if text.endswith(".") and not re.fullmatch(r"-?\d+\.\d+", text): | |
| text = text[:-1].strip() | |
| return text | |
| def normalize_final_answer(*args: str) -> str: | |
| """ | |
| Backward-compatible normalizer. | |
| Supports: | |
| - normalize_final_answer(text) | |
| - normalize_final_answer(question, text) | |
| """ | |
| if len(args) == 1: | |
| question = "" | |
| text = args[0] | |
| elif len(args) == 2: | |
| question, text = args | |
| else: | |
| return "" | |
| text = normalize_basic_answer(text) | |
| if not text: | |
| return "" | |
| q = (question or "").lower() | |
| # Remove outer labels once more, conservatively | |
| text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip() | |
| # first name only | |
| if "give only the first name" in q or "first name only" in q: | |
| tokens = text.split() | |
| if tokens: | |
| text = tokens[0] | |
| # last name only | |
| if "last names only" in q or "use their last names only" in q: | |
| parts = [part.strip() for part in text.split(",") if part.strip()] | |
| if parts: | |
| cleaned_parts: list[str] = [] | |
| for part in parts: | |
| tokens = part.split() | |
| cleaned_parts.append(tokens[-1] if tokens else part) | |
| text = ", ".join(cleaned_parts) | |
| # surname only | |
| if "what is the surname" in q or "surname of" in q: | |
| tokens = text.split() | |
| if tokens: | |
| text = tokens[-1] | |
| # city only | |
| if "city name without abbreviations" in q or "just give me the city name" in q: | |
| text = re.split(r"[,;()\-]", text)[0].strip() | |
| # IOC code | |
| if "ioc country code" in q: | |
| text = text.strip().upper() | |
| # algebraic notation | |
| if "algebraic notation" in q: | |
| text = text.strip().split()[0] | |
| # comma-separated list formatting | |
| if ( | |
| "comma separated list" in q | |
| or "comma-separated list" in q | |
| or "comma delimited list" in q | |
| or "comma-delimited list" in q | |
| or "comma separated" in q | |
| ): | |
| parts = [p.strip() for p in re.split(r",|\n", text) if p.strip()] | |
| text = ",".join(parts) | |
| # ascending order | |
| if "ascending order" in q: | |
| try: | |
| nums = [int(x.strip()) for x in text.split(",") if x.strip()] | |
| text = ",".join(str(n) for n in sorted(nums)) | |
| except Exception: | |
| pass | |
| # alphabetical order | |
| if "alphabetical order" in q or "alphabetize" in q or "alphabetized" in q: | |
| parts = [p.strip() for p in text.split(",") if p.strip()] | |
| if parts: | |
| text = ",".join(sorted(parts, key=lambda x: x.lower())) | |
| # two decimal places | |
| if "two decimal places" in q: | |
| compact = text.replace(",", "") | |
| match = re.search(r"-?\d+(?:\.\d+)?", compact) | |
| if match: | |
| try: | |
| value = float(match.group(0)) | |
| text = f"{value:.2f}" | |
| except Exception: | |
| pass | |
| if "nasa award number" in q: | |
| text = text.replace("NASA award number", "").strip() | |
| if "city name without abbreviations" in q: | |
| text = text.replace("St. Petersburg", "Saint Petersburg").strip() | |
| if "use their last names only" in q: | |
| parts = [p.strip() for p in text.split(",") if p.strip()] | |
| last_names = [] | |
| for part in parts: | |
| tokens = part.split() | |
| if tokens: | |
| last_names.append(tokens[-1]) | |
| if last_names: | |
| text = ",".join(last_names) | |
| return text.strip() | |
| def is_placeholder_answer(text: str) -> bool: | |
| """ | |
| Detect placeholder/fallback outputs. | |
| """ | |
| if text is None: | |
| return True | |
| normalized = normalize_basic_answer(text).lower() | |
| return normalized in { | |
| "", | |
| "placeholder", | |
| "n/a", | |
| "unknown", | |
| } |