import csv
import io
import json
import html  # For escaping HTML characters
from bs4 import BeautifulSoup
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the tokenizer and model directly
tokenizer = AutoTokenizer.from_pretrained("mattshumer/Reflection-Llama-3.1-70B")
model = AutoModelForCausalLM.from_pretrained("mattshumer/Reflection-Llama-3.1-70B")

def clean_test_case_output(text):
    """
    Cleans the output to handle HTML characters and unwanted tags.
    """
    text = html.unescape(text)  # Unescape HTML entities
    soup = BeautifulSoup(text, 'html.parser')  # Use BeautifulSoup to handle HTML tags
    cleaned_text = soup.get_text(separator="\n").strip()  # Remove tags and handle newlines
    return cleaned_text

def generate_testcases(user_story):
    """
    Generates advanced QA test cases based on a provided user story by interacting 
    with the Reflection-Llama-3.1-70B model. The prompt is refined for clarity, 
    and the output is processed for better quality.
    
    :param user_story: A string representing the user story for which to generate test cases.
    :return: A list of test cases in the form of dictionaries.
    """

    # Few-shot learning examples to guide the model
    few_shot_examples = """
    "if its not a DropBury or ODAC Portal User Story, then we perform testing in Tech360 iOS App"
    "Generate as many test cases as possible, minimum 6, maximum it can be anything"
    "Understand the story thoroughly"
    "If it's a DropBury or ODAC Portal User Story, then we perform testing in ODAC Portal"
    """

    # Combine the few-shot examples with the user story for the model to process
    prompt = few_shot_examples + f"\nUser Story: {user_story}\n"

    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate text with the model
    outputs = model.generate(
        **inputs,
        max_length=4096,
        temperature=0.03,
        top_p=0.7,
        do_sample=False
    )

    # Decode the generated text
    test_cases_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Ensure the entire response is captured before cleaning
    if test_cases_text.strip() == "":
        return [{"test_case": "No test cases generated or output was empty."}]

    # Clean the output by unescaping HTML entities and replacing <br> tags
    test_cases_text = clean_test_case_output(test_cases_text)

    try:
        # Try to parse the output as JSON, assuming the model returns structured test cases
        test_cases = json.loads(test_cases_text)
        if isinstance(test_cases, list):
            return test_cases  # Return structured test cases

        else:
            return [{"test_case": test_cases_text}]  # Return as a list with the text wrapped in a dict

    except json.JSONDecodeError:
        # Fallback: return the raw text if JSON parsing fails
        return [{"test_case": test_cases_text}]

# Export test cases in CSV format
def export_test_cases(test_cases, format='csv'):
    if not test_cases:
        return "No test cases to export."

    # Convert test cases (which are currently strings) into a structured format for CSV
    structured_test_cases = [{'Test Case': case.get('test_case', case)} for case in test_cases]

    if format == 'csv':
        if isinstance(test_cases, list) and isinstance(test_cases[0], dict):
            output = io.StringIO()
            csv_writer = csv.DictWriter(output, fieldnames=structured_test_cases[0].keys(), quoting=csv.QUOTE_ALL)
            csv_writer.writeheader()
            csv_writer.writerows(structured_test_cases)
            return output.getvalue()
        else:
            raise ValueError("Test cases must be a list of dictionaries for CSV export.")

# Save test cases as a CSV file
def save_test_cases_as_file(test_cases, format='csv'):
    if not test_cases:
        return "No test cases to save."

    if format == 'csv':
        with open('test_cases.csv', 'w', newline='') as file:
            dict_writer = csv.DictWriter(file, fieldnames=['Test Case'])
            dict_writer.writeheader()
            dict_writer.writerows([{'Test Case': case.get('test_case', case)} for case in test_cases])
    else:
        return f"Unsupported format: {format}"
    return f'{format} file saved'