Spaces:

naacl-anonymous
/

selective_pre_translation

Runtime error

App Files Files Community

Anonymous commited on Sep 29, 2024

Commit

208053f

1 Parent(s): f18411b

add files

Browse files

Files changed (6) hide show

app.py +211 -0
generate_prompt.py +642 -0
tasks/ner.py +132 -0
tasks/nli.py +496 -0
tasks/qa.py +770 -0
tasks/summarization.py +149 -0

app.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import gradio as gr
+import os
+from openai import OpenAI
+from generate_prompt import construct_generic_prompt, recommend_config
+# Define available tasks and their corresponding datasets
+QA = "QA"
+SUMMARIZATION = "Summarization"
+NLI = "NLI"
+NER = "NER"
+tasks_datasets = {
+    QA: ["XQuad", "Indicqa"],
+    SUMMARIZATION: ["XLSum", "HeSum"],
+    NLI: ["XNLI"],
+    NER: ["MasakaNER", "WikiANN"]
+}
+# List of all languages
+languages = [
+    "English", "Spanish", "French", "German", "Chinese", "Japanese", "Korean", "Italian",
+    "Portuguese", "Russian", "Arabic", "Hindi", "Bengali", "Turkish", "Vietnamese", "Polish",
+    "Dutch", "Indonesian", "Malay", "Thai", "Greek", "Swedish", "Hungarian", "Finnish",
+    "Danish", "Norwegian", "Hebrew", "Czech", "Slovak", "Bulgarian", "Romanian", "Serbian",
+    "Croatian", "Ukrainian", "Lithuanian", "Latvian", "Estonian", "Filipino", "Icelandic",
+    "Irish", "Welsh", "Maltese", "Swahili", "Zulu", "Afrikaans"
+]
+def get_datasets(task):
+    return tasks_datasets.get(task, [])
+with gr.Blocks() as demo:
+    with gr.Row():
+        gr.Markdown("## Multilingual Prompt Generator")
+    with gr.Row():
+        with gr.Column(scale=2):
+            instruction = gr.Textbox(label="Instruction")
+            openai_key = gr.Textbox(label="OpenAI API key", type="password")
+            model = gr.Textbox(label="Model", placeholder="Enter model name (e.g., gpt-4-vision-preview)")
+            model_type = gr.Dropdown(label="Model Type", choices=["Multilingual", "English"], value='English')
+            config_recommendation = gr.Button("Recommend Configuration")
+        with gr.Column():
+            task = gr.Dropdown(label="Task", choices=list(tasks_datasets.keys()), value=QA)
+            language = gr.Dropdown(label="Source Language", choices=languages, value="English")
+            zero_shot = gr.Checkbox(label="Zero-shot", value=False)
+            with gr.Accordion("Prompt Configuration Selection", open=False):
+                prefix_selection = gr.Dropdown(["English", "Source"], label="prefix", value='English')
+                context_selection = gr.Dropdown(["English", "Source"], label="context", value='English')
+                examples_selection = gr.Dropdown(["English", "Source"], label="examples" , value='English')
+                output_selection = gr.Dropdown(["English", "Source"], label="output", value='English')
+            with gr.Accordion("Few Shot - Select Type of Examples ", open=False, visible=True) as few_shot:
+                dataset = gr.Dropdown(label="Dataset", choices=tasks_datasets[QA], value="XlSum")
+                num_examples = gr.Slider(label="Number of examples in context", minimum=1, maximum=10, step=1, value=3)
+    with gr.Row():
+        question = gr.Textbox(label="Question", visible=True)
+        context = gr.Textbox(label="Context", visible=True)
+        text = gr.Textbox(label="Text", visible=False)
+        sentence = gr.Textbox(label="Sentence", visible=False)
+        hypothesis = gr.Textbox(label="Hypothesis", visible=False)
+        premise = gr.Textbox(label="Premise", visible=False)
+    with gr.Row():
+        config_prompt = gr.Textbox(label="Recommended Configuration", interactive=False,
+                            placeholder="Recommended Configuration for this scenerio")
+    generate_button = gr.Button("Generate Prompt")
+    with gr.Row():
+        prompt = gr.Textbox(label="Generated Prompt", interactive=False, placeholder="Generated prompt will appear here.")
+    def update_datasets(selected_task):
+        return gr.Dropdown(choices=get_datasets(selected_task))
+    def toggle_task_inputs(selected_task):
+        if selected_task == QA:
+            return (
+                gr.update(visible=True), gr.update(visible=True), gr.update(visible=False),
+                gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
+            )
+        elif selected_task == SUMMARIZATION:
+            return (
+                gr.update(visible=False), gr.update(visible=False), gr.update(visible=True),
+                gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
+            )
+        elif selected_task == NER:
+            return (
+                gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
+                gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
+            )
+        else:
+            return (
+                gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
+                gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
+            )
+    def toggle_num_examples(zero_shot_value):
+        # If zero_shot is True, hide the num_examples slider
+        return gr.update(visible=not zero_shot_value)
+    def update_language_selection(language):
+        return gr.update(choices=list({'English', language})), gr.update(choices=list({'English', language})), gr.update(choices=list({'English', language})), gr.update(choices=list({'English', language}))
+    def generatePrompt(instruction, num_examples, zero_shot,
+            task, selected_language, dataset, prefix_selection, context_selection, examples_selection, output_selection,
+            text,  question, context, sentence, hypothesis, premise):
+        config = {'prefix': str.lower(prefix_selection), 'input': str.lower(context_selection), 'context': str.lower(examples_selection), 'output': str.lower(output_selection)}
+        if task == QA:
+            text_example = {
+                'context': context,
+                'question': question,
+            }
+        elif task == SUMMARIZATION:
+            text_example = {
+                'text': text,
+            }
+        elif task == NER:
+            text_example = {
+                'tokens': sentence,
+            }
+        else:
+            text_example = {
+                'hypothesis': hypothesis,
+                'premise': premise
+            }
+        print(text_example)
+        prompt = construct_generic_prompt(task, instruction, text_example, zero_shot, num_examples, selected_language, dataset, config)
+        return prompt
+    def respond(message, openai_key, url, chat_history, model, config_input, config_prefix, config_context,
+                config_output, task, dataset, language, num_examples, zero_shot):
+        os.environ["OPENAI_API_KEY"] = openai_key
+        client = OpenAI()
+        config = {
+            "input": config_input,
+            "prefix": config_prefix,
+            "context": config_context.split(', '),
+            "output": config_output,
+            "language": language,
+            "num_examples": num_examples,
+            "zero_shot": zero_shot
+        }
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": message},
+                        {"type": "image_url", "image_url": url},
+                        {"type": "config", "config": config},
+                        {"type": "task", "text": task},
+                        {"type": "dataset", "text": dataset}
+                    ],
+                },
+            ],
+            max_tokens=1000,
+        )
+        out = response.choices[0].message.content
+        chat_history.append((message, out))
+        return "", chat_history
+    # Bind functions to dropdown changes and button click
+    # task.change(fn=update_datasets, outputs=dataset)
+    language.change(fn=update_language_selection, inputs=language, outputs=[prefix_selection, context_selection, examples_selection, output_selection])
+    zero_shot.change(fn=toggle_num_examples, inputs=zero_shot, outputs=few_shot)
+    zero_shot.change(fn=toggle_num_examples, inputs=zero_shot, outputs=num_examples)
+    task.change(fn=update_datasets, inputs=task, outputs=dataset)
+    task.change(fn=toggle_task_inputs, inputs=task, outputs=[
+        question, context, text, sentence, hypothesis, premise,
+    ])
+    generate_button.click(
+        generatePrompt,
+        inputs=[
+            instruction, num_examples, zero_shot,
+            task, language, dataset, prefix_selection, context_selection, examples_selection, output_selection,
+            text, question, context, sentence, hypothesis, premise
+        ],
+        outputs=[prompt]
+    )
+    config_recommendation.click(
+        recommend_config,
+        inputs=[
+            task,
+            language,
+            model_type
+        ],
+        outputs=[config_prompt]
+    )
+if __name__ == '__main__':
+    demo.launch()

generate_prompt.py ADDED Viewed

	@@ -0,0 +1,642 @@

+import csv
+import enum
+import json
+import logging
+import os
+import re
+import string
+import sys
+import unicodedata
+from typing import Any, Dict, List, NewType, Union
+import numpy as np
+import openai
+import pandas as pd
+import requests
+import yaml
+from datasets import Dataset, load_dataset
+from easygoogletranslate import EasyGoogleTranslate
+from langchain.prompts import FewShotPromptTemplate, PromptTemplate
+from tqdm import tqdm
+from yaml.loader import SafeLoader
+from selective_pre_translation.tasks import qa, summarization, ner, nli
+# from models.model_completion import gpt3x_completion, gemini_completion
+class LanguageType(enum.Enum):
+    Low = "Low"
+    High = "High"
+class ModelType(enum.Enum):
+    English = "English"
+    Multilingual = "Multilingual"
+def get_entities_gpt3_long(prompt):
+    response = openai.ChatCompletion.create(
+        engine="chatgpt", temperature=0, messages=[{"role": "user", "content": prompt}]
+    )
+    return response["choices"][0]["message"]["content"]
+def gpt3x_completion(
+        prompt: Union[str, List[Dict[str, str]]],
+) -> str:
+    import os
+    import openai
+    os.environ["OPENAI_API_KEY"] = ''
+    def get_entities_chatGPT(final_prompt):
+        response = openai.ChatCompletion.create(
+            engine="gpt35-16k",
+            temperature=0,
+            messages=[
+                {"role": "user", "content": final_prompt}
+            ]
+        )
+        return response['choices'][0]['message']['content']
+    return get_entities_chatGPT(final_prompt=prompt)
+def mixtral_completion(prompt):
+    url = "https://api.together.xyz/v1/chat/completions"
+    # Define your Together API key
+    together_api_key = ""  # Replace with your actual API key
+    # Define the request payload
+    payload = {
+        "temperature": 0,
+        "max_tokens": 30,
+        "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "messages": [{"role": "user", "content": f"{prompt}"}],
+    }
+    # Define request headers
+    headers = {
+        "Authorization": f"Bearer {together_api_key}",
+        "Content-Type": "application/json",
+    }
+    # Send POST request
+    response = requests.post(url, json=payload, headers=headers)
+    # Check response status
+    if response.status_code == 200:
+        # Print the response content (API output)
+        return response.json()["choices"][0]["message"]["content"]
+    else:
+        # Print error message if request fails
+        print(f"Error: {response.status_code} - {response.text}")
+XQUAD_LANG2CODES = {
+    "bengali": "bn",
+    "korean": "ko",
+    "swahili": "sw",
+    "english": "en",
+    "indonesian": "id",
+    "arabic": "ar",
+    "finnish": "fi",
+    "telugu": "te",
+    "russian": "ru",
+    "german": "de",
+    "greek": "el",
+    "hindi": "hi",
+    "vietnamese": "vi",
+    "romanian": "ro",
+}
+INDICQA_LANG2CODES = {
+    "indicqa": "as",
+    "bengali": "bn",
+    "gujarati": "gu",
+    "hindi": "hi",
+    "kannada": "kn",
+    "malayalam": "ml",
+    "marathi": "mr",
+    "odia": "or",
+    "punjabi": "pa",
+    "tamil": "ta",
+    "telugu": "te",
+    "assamese": "as",
+}
+PUNCT = {
+    chr(i)
+    for i in range(sys.maxunicode)
+    if unicodedata.category(chr(i)).startswith("P")
+}.union(string.punctuation)
+WHITESPACE_LANGS = ["en", "es", "hi", "vi", "de", "ar"]
+MIXED_SEGMENTATION_LANGS = ["zh"]
+TYDIQA_LANG2CODES = {
+    "bengali": "bn",
+    "korean": "ko",
+    "swahili": "sw",
+    "english": "en",
+    "indonesian": "id",
+    "arabic": "ar",
+    "finnish": "fi",
+    "telugu": "te",
+    "russian": "ru",
+    "assamese": "as",
+    "persian": "fa",
+}
+logger = logging.Logger("Xlsum_task")
+LANGUAGE_TO_SUFFIX = {
+    "chinese_simplified": "zh-CN",
+    "french": "fr",
+    "portuguese": "pt",
+    "english": "en",
+    "arabic": "ar",
+    "hindi": "hi",
+    "indonesian": "id",
+    "amharic": "am",
+    "bengali": "bn",
+    "telugu": "te",
+    "burmese": "my",
+    "german": "de",
+    "greek": "el",
+    "tamil": "ta",
+    "assamese": "as",
+    "hindi": "hi",
+    "vietnamese": "vi",
+    "russian": "ru",
+    "telugu": "te",
+    "romanian": "ro",
+    "malayalam": "ml",
+    "persian": "fa",
+}
+PARAMS = NewType("PARAMS", Dict[str, Any])
+def read_parameters(args_path) -> PARAMS:
+    with open(args_path) as f:
+        args = yaml.load(f, Loader=SafeLoader)
+    return args
+def load_qa_dataset(dataset_name, lang, split, translate_test=False, limit=5):
+    if dataset_name == "indicqa":
+        if split != "train":
+            dataset = load_dataset(
+                "ai4bharat/IndicQA", f"indicqa.{INDICQA_LANG2CODES[lang]}"
+            )[split]
+        else:
+            dataset = load_dataset("squad_v2")[split]
+    elif dataset_name == "xquad":
+        if split != "train":
+            dataset = load_dataset("xquad", f"xquad.{XQUAD_LANG2CODES[lang]}")[
+                "validation"
+            ]
+        else:
+            dataset = load_dataset("squad")[split]
+    elif dataset_name == "tydiqa":
+        dataset = load_dataset("tydiqa", "secondary_task")[split]
+        dataset = dataset.map(
+            lambda example: {"lang": TYDIQA_LANG2CODES[example["id"].split("-")[0]]}
+        )
+        dataset = dataset.filter(lambda example: example["lang"] == lang)
+    elif dataset_name == "mlqa":
+        if split == "train":
+            print("No Training Data for MLQA, switching to validation!")
+            split = "validation"
+        if translate_test:
+            dataset_name = f"mlqa-translate-test.{lang}"
+        else:
+            dataset_name = f"mlqa.{lang}.{lang}"
+        dataset = load_dataset("mlqa", dataset_name)[split]
+    else:
+        raise NotImplementedError()
+    return dataset.select(np.arange(limit))
+def construct_prompt(
+        instruction: str,
+        test_example: dict,
+        ic_examples: List[dict],
+        zero_shot: bool,
+        lang: str,
+        config: Dict[Any, Any],
+):
+    example_prompt = PromptTemplate(
+        input_variables=["context", "question", "answers"],
+        template="Context: {context}\nQuestion: {question}\n" "Answers: {answers}",
+    )
+    zero_shot_template = (
+            f"""{instruction}""" + "\n<Context>: {context} \n<Question>: {question} " ""
+    )
+    prompt = (
+        FewShotPromptTemplate(
+            examples=ic_examples,
+            prefix=instruction,
+            example_prompt=example_prompt,
+            suffix="<Context>: {context} \n<Question>: {question} \nAnswers: ?",
+            input_variables=["question", "context"],
+        )
+        if not zero_shot
+        else PromptTemplate(
+            input_variables=["question", "context"], template=zero_shot_template
+        )
+    )
+    label = test_example["answers"]
+    if config["input"] != lang:
+        test_example = _translate_example(
+            example=test_example, src_language=lang, target_language=config["input"]
+        )
+    return (
+        prompt.format(
+            question=test_example["question"], context=test_example["context"]
+        ),
+        label,
+    )
+def dump_metrics(
+        lang: str, config: Dict[str, str], f1: float, em: float, metric_logger_path: str
+):
+    # Check if the metric logger file exists
+    file_exists = os.path.exists(metric_logger_path)
+    # Open the CSV file in append mode
+    with open(metric_logger_path, "a", newline="") as f:
+        csvwriter = csv.writer(f, delimiter=",")
+        # Write header row if the file is newly created
+        if not file_exists:
+            header = ["Language", "Prefix", "Input", "Context", "Output", "F1", "Em"]
+            csvwriter.writerow(header)
+        csvwriter.writerow(
+            [
+                lang,
+                config["prefix"],
+                config["input"],
+                config["context"][0],
+                config["output"],
+                f1,
+                em,
+            ]
+        )
+def dump_predictions(idx, response, label, response_logger_file):
+    obj = {"q_idx": idx, "prediction": response, "label": label}
+    with open(response_logger_file, "a") as f:
+        f.write(json.dumps(obj, ensure_ascii=False) + "\n")
+def _translate_instruction(basic_instruction: str, target_language: str) -> str:
+    translator = EasyGoogleTranslate(
+        source_language="en",
+        target_language=LANGUAGE_TO_SUFFIX[target_language],
+        timeout=50,
+    )
+    return translator.translate(basic_instruction)
+def _translate_prediction_to_output_language(
+        prediction: str, prediction_language: str, output_language: str
+) -> str:
+    translator = EasyGoogleTranslate(
+        source_language=LANGUAGE_TO_SUFFIX[prediction_language],
+        target_language=LANGUAGE_TO_SUFFIX[output_language],
+        timeout=10,
+    )
+    return translator.translate(prediction)
+def create_instruction(lang: str, expected_output: str):
+    basic_instruction = (
+        "Answer to the <Question> below, based only to the given <Context>, Follow these instructions:\n"
+        "1. The answer should include only words from the given context\n"
+        "2. The answer must include up to 5 words\n"
+        "3. The answer Should be the shortest as possible\n"
+        f"4. The answer must be in {expected_output} only!, not another language!!!"
+    )
+    return (
+        basic_instruction
+        if lang == "english"
+        else _translate_instruction(basic_instruction, target_language=lang)
+    )
+def _translate_example(
+        example: Dict[str, str], src_language: str, target_language: str
+):
+    translator = EasyGoogleTranslate(
+        source_language=LANGUAGE_TO_SUFFIX[str(src_language).lower()],
+        target_language=LANGUAGE_TO_SUFFIX[str(target_language).lower()],
+        timeout=30,
+    )
+    return {
+        "question": translator.translate(example["question"]),
+        "context": translator.translate(example["context"][:2000])
+                   + translator.translate(example["context"][2000:4000])
+                   + translator.translate(example["context"][4000:6000]),
+        "answers": translator.translate(example["answers"][0]),
+    }
+    # except Exception as e:
+    #     print(example["text"])
+    #     print(example["summary"])
+    #     print(e)
+def choose_few_shot_examples(
+        train_dataset: Dataset,
+        few_shot_size: int,
+        context: List[str],
+        selection_criteria: str,
+        lang: str,
+) -> List[Dict[str, Union[str, int]]]:
+    """Selects few-shot examples from training datasets
+    Args:
+        train_dataset (Dataset): Training Dataset
+        few_shot_size (int): Number of few-shot examples
+        selection_criteria (few_shot_selection): How to select few-shot examples. Choices: [random, first_k]
+    Returns:
+        List[Dict[str, Union[str, int]]]: Selected examples
+    """
+    selected_examples = []
+    example_idxs = []
+    if selection_criteria == "first_k":
+        example_idxs = list(range(few_shot_size))
+    elif selection_criteria == "random":
+        example_idxs = (
+            np.random.choice(len(train_dataset), size=few_shot_size, replace=True)
+            .astype(int)
+            .tolist()
+        )
+    ic_examples = [
+        {
+            "question": train_dataset[idx]["question"],
+            "context": train_dataset[idx]["context"],
+            "answers": train_dataset[idx]["answers"]["text"],
+        }
+        for idx in example_idxs
+    ]
+    for idx, ic_language in enumerate(context):
+        (
+            selected_examples.append(ic_examples[idx])
+            if ic_language == lang
+            else (
+                selected_examples.append(
+                    _translate_example(
+                        example=ic_examples[idx],
+                        src_language=lang,
+                        target_language=ic_language,
+                    )
+                )
+            )
+        )
+    return selected_examples
+def normalize_answer(s):
+    """Lower text and remove punctuation, articles and extra whitespace."""
+    def remove_articles(text):
+        return re.sub(r"\b(a|an|the)\b", " ", text)
+    def white_space_fix(text):
+        return " ".join(text.split())
+    def remove_punc(text):
+        exclude = set(PUNCT)  # set(string.punctuation)
+        return "".join(ch for ch in text if ch not in exclude)
+    def lower(text):
+        return text.lower()
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+def process_test_example(
+        test_data, config_header, idx, test_example, config, zero_shot, lang, params
+):
+    try:
+        # Your existing code for processing each test example
+        instruction = create_instruction(
+            lang=config["prefix"], expected_output=config["output"]
+        )
+        text_example = {
+            "question": test_example["question"],
+            "context": test_example["context"],
+            "answers": test_example["answers"]["text"],
+        }
+        ic_examples = []
+        if not zero_shot:
+            ic_examples = choose_few_shot_examples(
+                train_dataset=test_data,
+                few_shot_size=len(config["context"]),
+                context=config["context"],
+                selection_criteria="random",
+                lang=params["selected_language"],
+            )
+        prompt, label = construct_prompt(
+            instruction=instruction,
+            test_example=text_example,
+            ic_examples=ic_examples,
+            zero_shot=zero_shot,
+            lang=lang,
+            config=config,
+        )
+        pred = gpt3x_completion(prompt=prompt)
+        print(pred)
+        logger.info("Saving prediction to persistent volume")
+        os.makedirs(
+            f"{params['response_logger_root']}/{params['model']}/{lang}", exist_ok=True
+        )
+        dump_predictions(
+            idx=idx,
+            response=pred,
+            label=label,
+            response_logger_file=f"{params['response_logger_root']}/{params['model']}/{lang}/{config_header}.csv",
+        )
+    except Exception as e:
+        # Handle exceptions here
+        print(f"Error processing example {idx}: {e}")
+def run_one_configuration(selected_language, config, zero_shot, dataset_name, limit=10):
+    test_data = load_qa_dataset(
+        dataset_name=dataset_name,
+        lang=selected_language,
+        split="validation" if dataset_name == "xquad" else "test",
+        limit=limit,
+    )
+    for idx, test_example in (pbar := tqdm(enumerate(test_data))):
+        try:
+            instruction = create_instruction(
+                lang=config["prefix"], expected_output=config["output"]
+            )
+            text_example = {
+                "question": test_example["question"],
+                "context": test_example["context"],
+                "answers": test_example["answers"]["text"],
+            }
+            ic_examples = []
+            if not zero_shot:
+                ic_examples = choose_few_shot_examples(
+                    train_dataset=test_data,
+                    few_shot_size=len(config["context"]),
+                    context=config["context"],
+                    selection_criteria="random",
+                    lang=selected_language,
+                )
+            prompt, label = construct_prompt(
+                instruction=instruction,
+                test_example=text_example,
+                ic_examples=ic_examples,
+                zero_shot=zero_shot,
+                lang=selected_language,
+                config=config,
+            )
+            pred = gpt3x_completion(prompt=prompt)
+            return pred
+        except Exception as e:
+            print(f"Found an exception {e}, continue to the next example")
+            continue
+QA = "QA"
+SUMMARIZATION = "Summarization"
+NLI = "NLI"
+NER = "NER"
+def construct_generic_prompt(task, instruction, test_example, zero_shot, num_examples, selected_language, dataset,
+                             config):
+    print(task)
+    if task == SUMMARIZATION:
+        prompt = summarization.construct_prompt(
+            instruction=instruction,
+            test_example=test_example,
+            zero_shot=zero_shot,
+            dataset=dataset,
+            num_examples=num_examples,
+            lang=str(selected_language).lower(),
+            config=config,
+        )
+    elif task == NER:
+        prompt = ner.construct_prompt(
+            instruction=instruction,
+            test_example=test_example,
+            zero_shot=zero_shot,
+            num_examples=num_examples,
+            lang=str(selected_language).lower(),
+            config=config,
+        )
+    elif task == QA:
+        prompt = qa.construct_prompt(
+            instruction=instruction,
+            test_example=test_example,
+            zero_shot=zero_shot,
+            num_examples=num_examples,
+            lang=str(selected_language).lower(),
+            config=config,
+            # dataset_name=dataset
+        )
+    else:
+        prompt = nli.construct_prompt(
+            instruction=instruction,
+            test_example=test_example,
+            zero_shot=zero_shot,
+            num_examples=num_examples,
+            lang=str(selected_language).lower(),
+            config=config,
+        )
+    return prompt
+def _get_language_type(language: str):
+    df = pd.read_csv("utils/languages_by_word_count.csv")
+    number_of_words = df[df['Language'] == language]['number of words'].iloc[0]
+    print(number_of_words)
+    return LanguageType.Low if number_of_words < 150276400 else LanguageType.High
+class Config:
+    def __init__(self, prefix="source", context="source", examples="source", output="source"):
+        self.prefix = prefix
+        self.context = context
+        self.examples = examples
+        self.output = output
+    def set(self, prefix=None, context=None, examples=None, output=None):
+        if prefix: self.prefix = prefix
+        if context: self.context = context
+        if examples: self.examples = examples
+        if output: self.output = output
+    def to_dict(self):
+        return {
+            'prefix': self.prefix,
+            'context': self.context,
+            'examples': self.examples,
+            'output': self.output
+        }
+def recommend_config(task, lang, model_type):
+    print(task)
+    print(model_type)
+    language_type = _get_language_type(lang)
+    config = Config()
+    print(language_type)
+    if task == QA:
+        if model_type == ModelType.English.value:
+            config.set(prefix='source', context='source', examples='source', output='source')
+        else:
+            config.set(prefix='english', context='source', examples='source', output='source')
+    if task == NER:
+        if model_type == ModelType.English.value:
+            config.set(prefix='source', context='source', examples='source', output='source')
+        elif language_type == LanguageType.High:
+            config.set(prefix='english', context='source', examples='source', output='source')
+        else:
+            config.set(prefix='english', context='source', examples='source', output='english')
+    if task == NLI:
+        if model_type == ModelType.English.value:
+            config.set(prefix='source', context='source', examples='source', output='source')
+        elif language_type == LanguageType.High:
+            print("here")
+            config.set(prefix='english', context='source', examples='english')
+        else:
+            print("here1")
+            config.set(prefix='english', context='english', examples='english')
+    if task == SUMMARIZATION:
+        config.set(context='english')
+    return config.to_dict()

tasks/ner.py ADDED Viewed

	@@ -0,0 +1,132 @@

+from typing import List, Dict, Any
+from easygoogletranslate import EasyGoogleTranslate
+from langchain.prompts import PromptTemplate, FewShotPromptTemplate
+LANGUAGE_TO_GOOGLE_TRANSLATE_MARK = {
+    "english": "en",
+    "bambara": "bm",
+    "ewe": "ee",
+    "hausa": "ha",
+    "igbo": "ig",
+    "kinyarwanda": "rw",
+    "chichewa": "ny",
+    "twi": "ak",
+    "yoruba": "yo",
+    "slovak": "sk",
+    "serbian": "sr",
+    "swedish": "sv",
+    "vietnamese": "vi",
+    "italian": "it",
+    "portuguese": "pt",
+    "chinese": "zh",
+    "english": "en",
+    "french": "fr"
+}
+LANGAUGE_TO_PREFIX = {
+    "bambara": "bam",
+    "ewe": "ewe",
+    "fon": "fon",
+    "hausa": "hau",
+    "igbo": "ibo",
+    "kinyarwanda": "kin",
+    "chichewa": "nya",
+    "twi": "twi",
+    "yoruba": "yor",
+    "slovak": "sk",
+    "serbian": "sr",
+    "swedish": "sv",
+    "vietnamese": "vi",
+    "italian": "it",
+    "portuguese": "pt",
+    "chinese": "zh",
+    "english": "en",
+    "french": "fr"
+}
+def _translate_instruction(basic_instruction: str, target_language: str) -> str:
+    translator = EasyGoogleTranslate(
+        source_language="en",
+        target_language=LANGAUGE_TO_PREFIX[target_language],
+        timeout=10,
+    )
+    return translator.translate(basic_instruction)
+def create_instruction(lang: str, expected_output: str):
+    basic_instruction = f"""You are an NLP assistant whose
+                            purpose is to perform Named Entity Recognition
+                            (NER). You will need to give each entity a tag, from the following:
+                            PER means a person, ORG means organization.
+                            LOC means a location entity.
+                            The output should be a list of tuples of the format:
+                            ['Tag: Entity', 'Tag: Entity'] for each entity in the sentence.
+                            The entities should be in {expected_output} language"""
+    return (
+        basic_instruction
+        if lang == "english"
+        else _translate_instruction(basic_instruction, target_language=lang)
+    )
+def construct_prompt(
+    instruction: str,
+    test_example: dict,
+    zero_shot: bool,
+    dataset: str,
+    num_examples: int,
+    lang: str,
+    config: Dict[str, str],
+):
+    if not instruction:
+        print(lang)
+        instruction = create_instruction(lang, config['prefix'])
+    example_prompt = PromptTemplate(
+        input_variables=["summary", "text"], template="Text: {text}\nSummary: {summary}"
+    )
+    zero_shot_template = f"""{instruction}""" + "\n Input: {text} " ""
+    test_data = load_xlsum_data(lang=lang, split="test", limit=100)
+    print(test_data)
+    print(num_examples)
+    print(lang)
+    ic_examples = []
+    if not zero_shot:
+        ic_examples = choose_few_shot_examples(
+            train_dataset=test_data,
+            few_shot_size=num_examples,
+            context=[config["context"]] * num_examples,
+            selection_criteria="random",
+            lang=lang,
+        )
+    prompt = (
+        FewShotPromptTemplate(
+            examples=ic_examples,
+            prefix=instruction,
+            example_prompt=example_prompt,
+            suffix="<Text>: {text}",
+            input_variables=["text"],
+        )
+        if not zero_shot
+        else PromptTemplate(input_variables=["text"], template=zero_shot_template)
+    )
+    print("lang", lang)
+    print(config["input"] , lang)
+    if config["input"] != lang:
+        test_example = _translate_example(
+            example=test_example, src_language=lang, target_language=config["input"]
+        )
+    print("test_example", prompt)
+    return prompt.format(text=test_example["text"])

tasks/nli.py ADDED Viewed

	@@ -0,0 +1,496 @@

+import time
+import csv
+import json
+import multiprocessing as mp
+import os
+from typing import Any, Dict, List, NewType, Optional, Union
+import openai
+import numpy as np
+import requests
+import yaml
+from datasets import Dataset, DatasetDict, load_dataset
+from easygoogletranslate import EasyGoogleTranslate
+from langchain.prompts import FewShotPromptTemplate, PromptTemplate
+from tqdm import tqdm
+from yaml.loader import SafeLoader
+LANGUAGE_TO_SUFFIX = {
+    "chinese_simplified": "zh-CN",
+    "french": "fr",
+    "portuguese": "pt",
+    "english": "en",
+    "arabic": "ar",
+    "hindi": "hi",
+    "indonesian": "id",
+    "amharic": "am",
+    "bengali": "bn",
+    "burmese": "my",
+    "chinese": "zh-CN",
+    "swahili": "sw",
+    "bulgarian": "bg",
+    "thai": "th",
+    "urdu": "ur",
+    "turkish": "tr",
+    "spanish": "es",
+    "chinese": "zh",
+    "greek": "el",
+    "german": "de"
+}
+NUMBER_TO_TAG = {0: "entailment", 1: "neutral", 2: "contradiction"}
+PARAMS = NewType("PARAMS", Dict[str, Any])
+def gemini_completion(prompt):
+    # Define the endpoint URL
+    genai.configure(api_key="AIzaSyBnghQNoOS2qiacHjqutK1RpPV5y-gv7Pg")
+    model = genai.GenerativeModel("models/gemini-1.0-pro-latest")
+    return model.generate_content(prompt).text
+def gpt3x_completion(
+    prompt: Union[str, List[Dict[str, str]]],
+    model: str = "chatgpt",
+    # run_details: Any = {},
+    # num_evals_per_sec: int = 2,
+    # **model_params,
+) -> str:
+    import os
+    import openai
+    os.environ["OPENAI_API_KEY"] = ''
+    def get_entities_chatGPT(final_prompt):
+        response = openai.ChatCompletion.create(
+            engine="gpt35-16k",
+            temperature=0,
+            messages=[
+                {"role": "user", "content": final_prompt}
+            ]
+        )
+        return response['choices'][0]['message']['content']
+    return get_entities_chatGPT(final_prompt=prompt)
+def mixtral_completion(prompt):
+    url = "https://api.together.xyz/v1/chat/completions"
+    # Define your Together API key
+    together_api_key = ""  # Replace with your actual API key
+    # Define the request payload
+    payload = {
+        "temperature": 0,
+        "max_tokens": 30,
+        "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "messages": [{"role": "user", "content": f"{prompt}"}],
+    }
+    # Define request headers
+    headers = {
+        "Authorization": f"Bearer {together_api_key}",
+        "Content-Type": "application/json",
+    }
+    # Send POST request
+    response = requests.post(url, json=payload, headers=headers)
+    # Check response status
+    if response.status_code == 200:
+        # Print the response content (API output)
+        return response.json()["choices"][0]["message"]["content"]
+    else:
+        # Print error message if request fails
+        print(f"Error: {response.status_code} - {response.text}")
+def read_parameters(args_path) -> PARAMS:
+    with open(args_path) as f:
+        args = yaml.load(f, Loader=SafeLoader)
+    return args
+def get_key(key_path):
+    with open(key_path) as f:
+        key = f.read().split("\n")[0]
+    return key
+def _translate_example(
+    example: Dict[str, str], src_language: str, target_language: str
+):
+    translator = EasyGoogleTranslate(
+        source_language=LANGUAGE_TO_SUFFIX[src_language],
+        target_language=LANGUAGE_TO_SUFFIX[target_language],
+        timeout=30,
+    )
+    try:
+        return {
+            "premise": translator.translate(example["premise"]),
+            "hypothesis": translator.translate(example["hypothesis"]),
+            "label": "",
+        }
+    except Exception as e:
+        print(e)
+def choose_few_shot_examples(
+    train_dataset: Dataset,
+    few_shot_size: int,
+    context: List[str],
+    selection_criteria: str,
+    lang: str,
+) -> List[Dict[str, Union[str, int]]]:
+    """Selects few-shot examples from training datasets
+    Args:
+        train_dataset (Dataset): Training Dataset
+        few_shot_size (int): Number of few-shot examples
+        selection_criteria (few_shot_selection): How to select few-shot examples. Choices: [random, first_k]
+    Returns:
+        List[Dict[str, Union[str, int]]]: Selected examples
+    """
+    selected_examples = []
+    example_idxs = []
+    if selection_criteria == "first_k":
+        example_idxs = list(range(few_shot_size))
+    elif selection_criteria == "random":
+        example_idxs = (
+            np.random.choice(len(train_dataset), size=few_shot_size, replace=True)
+            .astype(int)
+            .tolist()
+        )
+    ic_examples = [train_dataset[idx] for idx in example_idxs]
+    ic_examples = [
+        {
+            "premise": example["premise"],
+            "hypothesis": example["hypothesis"],
+            "label": NUMBER_TO_TAG[example["label"]],
+        }
+        for example in ic_examples
+    ]
+    for idx, ic_language in enumerate(context):
+        (
+            selected_examples.append(ic_examples[idx])
+            if ic_language == lang
+            else (
+                selected_examples.append(
+                    _translate_example(
+                        example=ic_examples[idx],
+                        src_language=lang,
+                        target_language=ic_language,
+                    )
+                )
+            )
+        )
+    return selected_examples
+def load_xnli_dataset(
+    dataset_name: str,
+    lang: str,
+    split: str,
+    limit: int = 200,
+) -> Union[Dataset, DatasetDict]:
+    """
+    Args:
+        lang (str): Language for which xnli dataset is to be loaded
+        split (str): Train test of validation split of the model to load
+        dataset_frac (float): Fraction of examples to load. Defaults to 1.0
+    Returns:
+        Union[Dataset, DatasetDict]: huggingface dataset object
+    """
+    if dataset_name == "indicxnli":  ##PJ:To add except hindi
+        dataset = load_dataset("Divyanshu/indicxnli", LANGUAGE_TO_SUFFIX[lang])[split]
+    else:
+        dataset = load_dataset("xnli", LANGUAGE_TO_SUFFIX[lang])[split]
+    return dataset.select(np.arange(limit))
+def construct_prompt(
+    instruction: str, test_example: dict, ic_examples: List[dict], zero_shot: bool
+):
+    example_prompt = PromptTemplate(
+        input_variables=["premise", "hypothesis", "label"],
+        template="Premise: {premise}\n Hypothesis: {hypothesis} \n Label{label}",
+    )
+    zero_shot_template = (
+        f"""{instruction}""" + "\n hypothesis: {hypothesis} + \n  Premise: {premise}" ""
+    )
+    prompt = (
+        FewShotPromptTemplate(
+            examples=ic_examples,
+            prefix=instruction,
+            example_prompt=example_prompt,
+            suffix="Premise: {premise} \n Hypothesis: {hypothesis}",
+            input_variables=["hypothesis", "premise"],
+        )
+        if not zero_shot
+        else PromptTemplate(
+            input_variables=["hypothesis", "premise"], template=zero_shot_template
+        )
+    )
+    return (
+        prompt.format(
+            hypothesis=test_example["hypothesis"], premise=test_example["premise"]
+        ),
+        test_example["label"],
+    )
+def dump_metrics(
+    lang: str,
+    config: Dict[str, str],
+    r1: float,
+    r2: float,
+    rL: float,
+    metric_logger_path: str,
+):
+    # Check if the metric logger file exists
+    file_exists = os.path.exists(metric_logger_path)
+    # Open the CSV file in append mode
+    with open(metric_logger_path, "a", newline="") as f:
+        csvwriter = csv.writer(f, delimiter=",")
+        # Write header row if the file is newly created
+        if not file_exists:
+            header = [
+                "Language",
+                "Prefix",
+                "Input",
+                "Context",
+                "Output",
+                "R1",
+                "R2",
+                "RL",
+            ]
+            csvwriter.writerow(header)
+        csvwriter.writerow(
+            [
+                lang,
+                config["prefix"],
+                config["input"],
+                config["context"][0],
+                config["output"],
+                r1,
+                r2,
+                rL,
+            ]
+        )
+def dump_predictions(idx, response, label, response_logger_file):
+    obj = {"q_idx": idx, "prediction": response, "label": label}
+    with open(response_logger_file, "a") as f:
+        f.write(json.dumps(obj, ensure_ascii=False) + "\n")
+def compute_rouge(scorer, pred, label):
+    score = scorer.score(pred, label)
+    return score["rouge1"], score["rouge2"], score["rougeL"]
+def _translate_instruction(basic_instruction: str, target_language: str) -> str:
+    translator = EasyGoogleTranslate(
+        source_language="en",
+        target_language=LANGUAGE_TO_SUFFIX[target_language],
+        timeout=10,
+    )
+    return translator.translate(basic_instruction)
+def _translate_prediction_to_output_language(
+    prediction: str, prediction_language: str, output_language: str
+) -> str:
+    translator = EasyGoogleTranslate(
+        source_language=LANGUAGE_TO_SUFFIX[prediction_language],
+        target_language=LANGUAGE_TO_SUFFIX[output_language],
+        timeout=10,
+    )
+    return translator.translate(prediction)
+def create_instruction(lang: str):
+    basic_instruction = f"""
+        You are an NLP assistant whose purpose is to solve Natural Language Inference (NLI) problems.
+        NLI is the task of determining the inference relation between two texts: entailment,
+        contradiction, or neutral.
+        Your answer should be one word of the following - entailment, contradiction, or neutral.
+        Pay attention: The output should be only one word!!!!
+        """
+    return (
+        basic_instruction
+        if lang == "english"
+        else _translate_instruction(basic_instruction, target_language=lang)
+    )
+def run_one_configuration(params: Optional[PARAMS] = None, zero: bool= False):
+    if not params:
+        params = read_parameters("../../parameters.yaml")
+    lang = params["selected_language"]
+    config = params["config"]
+    zero_shot = len(config["context"]) == 0
+    if not zero:
+        config_header = f"{config['input']}_{config['prefix']}_{config['context'][0]}"
+    else:
+        config_header = f"{config['input']}_{config['prefix']}_zero"
+    test_data = load_xnli_dataset(
+        dataset_name=params["dataset_name"],
+        lang=lang,
+        split="test",
+        limit=params["limit"],
+    )
+    pool = mp.Pool(processes=3)
+    # Iterate over test_data using tqdm for progress tracking
+    for idx, test_example in tqdm(enumerate(test_data), total=len(test_data)):
+        # Apply asynchronous processing of each test example
+        pool.apply_async(
+            process_test_example,
+            args=(
+                test_data,
+                config_header,
+                idx,
+                test_example,
+                config,
+                zero_shot,
+                lang,
+                params,
+            ),
+        )
+    # Close the pool and wait for all processes to finish
+    pool.close()
+    pool.join()
+def process_test_example(
+    test_data, config_header, idx, test_example, config, zero_shot, lang, params
+):
+    try:
+        instruction = create_instruction(lang=config["prefix"])
+        text_example = {
+            "premise": test_example["premise"],
+            "hypothesis": test_example["hypothesis"],
+            "label": test_example["label"],
+        }
+        ic_examples = []
+        if not zero_shot:
+            ic_examples = choose_few_shot_examples(
+                train_dataset=test_data,
+                few_shot_size=len(config["context"]),
+                context=config["context"],
+                selection_criteria="random",
+                lang=params["selected_language"],
+            )
+        prompt, label = construct_prompt(
+            instruction=instruction,
+            test_example=text_example,
+            ic_examples=ic_examples,
+            zero_shot=zero_shot,
+        )
+        pred = get_prediction(prompt=prompt, endpoint_id=7327255438662041600, project_id=16514800572)
+        print(pred)
+        os.makedirs(
+            f"{params['response_logger_root']}/{params['model']}/{lang}", exist_ok=True
+        )
+        dump_predictions(
+            idx=idx,
+            response=pred,
+            label=label,
+            response_logger_file=f"{params['response_logger_root']}/{params['model']}/{lang}/{config_header}.csv",
+        )
+    except Exception as e:
+        # Handle exceptions here
+        print(f"Error processing example {idx}: {e}")
+def construct_prompt(
+    instruction: str,
+    test_example: dict,
+    zero_shot: bool,
+    num_examples: int,
+    lang: str,
+    config: Dict[str, str],
+    dataset_name: str = 'xnli'
+):
+    if not instruction:
+        print(lang)
+        instruction = create_instruction(lang)
+    example_prompt = PromptTemplate(
+        input_variables=["premise", "hypothesis", "label"],
+        template="Premise {premise}\n Hypothesis {hypothesis} \n{label}",
+    )
+    zero_shot_template = (
+        f"""{instruction}""" + "\n Hypothesis: {hypothesis} + \n  Premise: {premise}" ""
+    )
+    test_data = load_xnli_dataset(dataset_name, lang, split="test", limit=100)
+    print(test_data)
+    print(num_examples)
+    print(lang)
+    ic_examples = []
+    if not zero_shot:
+        ic_examples = choose_few_shot_examples(
+            train_dataset=test_data,
+            few_shot_size=num_examples,
+            context=[config["context"]] * num_examples,
+            selection_criteria="random",
+            lang=lang,
+        )
+    prompt = (
+        FewShotPromptTemplate(
+            examples=ic_examples,
+            prefix=instruction,
+            example_prompt=example_prompt,
+            suffix="{premise} \n{hypothesis}",
+            input_variables=["hypothesis", "premise"],
+        )
+        if not zero_shot
+        else PromptTemplate(
+            input_variables=["hypothesis", "premise"], template=zero_shot_template
+        )
+    )
+    print("lang", lang)
+    print(config["input"] , lang)
+    if config["input"] != lang:
+        test_example = _translate_example(
+            example=test_example, src_language=lang, target_language=config["input"]
+        )
+    return prompt.format(
+        hypothesis=test_example["hypothesis"], premise=test_example["premise"]
+    )

tasks/qa.py ADDED Viewed

	@@ -0,0 +1,770 @@

+import csv
+import json
+import logging
+import multiprocessing as mp
+import os
+import subprocess
+import re
+import string
+import sys
+import subprocess
+import time
+import unicodedata
+from typing import Any, Dict, List, NewType, Optional, Union
+import numpy as np
+import openai
+import requests
+import yaml
+from datasets import Dataset, load_dataset
+from easygoogletranslate import EasyGoogleTranslate
+from evaluate import load
+from langchain.prompts import FewShotPromptTemplate, PromptTemplate
+from tqdm import tqdm
+from yaml.loader import SafeLoader
+# from models.model_completion import gpt3x_completion, gemini_completion
+def gemini_completion(prompt):
+    # Define the endpoint URL
+    genai.configure(api_key="")
+    model = genai.GenerativeModel("models/gemini-1.0-pro-latest")
+    return model.generate_content(prompt).text
+# checkpoint = "bigscience/mt0-base"
+# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+#
+# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+# model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, torch_dtype="auto", device_map="auto")
+# model.to("cuda:04")
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+def get_entities_gpt3_long(prompt):
+    response = openai.ChatCompletion.create(
+        engine="chatgpt", temperature=0, messages=[{"role": "user", "content": prompt}]
+    )
+    return response["choices"][0]["message"]["content"]
+def gpt3x_completion(
+        prompt: Union[str, List[Dict[str, str]]],
+        model: str = "chatgpt",
+        # run_details: Any = {},
+        # num_evals_per_sec: int = 2,
+        # **model_params,
+) -> str:
+    import os
+    import openai
+    os.environ["OPENAI_API_KEY"] = ''
+    openai.api_type = "azure"
+    def get_entities_chatGPT(final_prompt):
+        response = openai.ChatCompletion.create(
+            engine="gpt35-16k",
+            temperature=0,
+            messages=[
+                {"role": "user", "content": final_prompt}
+            ]
+        )
+        return response['choices'][0]['message']['content']
+    return get_entities_chatGPT(final_prompt=prompt)
+def mt0_completion(prompt):
+    inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
+    outputs = model.generate(inputs)
+    return tokenizer.decode(outputs[0])
+def mixtral_completion(prompt):
+    url = "https://api.together.xyz/v1/chat/completions"
+    # Define your Together API key
+    together_api_key = ""  # Replace with your actual API key
+    # Define the request payload
+    payload = {
+        "temperature": 0,
+        "max_tokens": 30,
+        "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "messages": [{"role": "user", "content": f"{prompt}"}],
+    }
+    # Define request headers
+    headers = {
+        "Authorization": f"Bearer {together_api_key}",
+        "Content-Type": "application/json",
+    }
+    # Send POST request
+    response = requests.post(url, json=payload, headers=headers)
+    # Check response status
+    if response.status_code == 200:
+        # Print the response content (API output)
+        return response.json()["choices"][0]["message"]["content"]
+    else:
+        # Print error message if request fails
+        print(f"Error: {response.status_code} - {response.text}")
+XQUAD_LANG2CODES = {
+    "bengali": "bn",
+    "korean": "ko",
+    "swahili": "sw",
+    "english": "en",
+    "indonesian": "id",
+    "arabic": "ar",
+    "finnish": "fi",
+    "telugu": "te",
+    "russian": "ru",
+    "german": "de",
+    "greek": "el",
+    "hindi": "hi",
+    "vietnamese": "vi",
+    "romanian": "ro",
+}
+INDICQA_LANG2CODES = {
+    "indicqa": "as",
+    "bengali": "bn",
+    "gujarati": "gu",
+    "hindi": "hi",
+    "kannada": "kn",
+    "malayalam": "ml",
+    "marathi": "mr",
+    "odia": "or",
+    "punjabi": "pa",
+    "tamil": "ta",
+    "telugu": "te",
+    "assamese": "as",
+}
+PUNCT = {
+    chr(i)
+    for i in range(sys.maxunicode)
+    if unicodedata.category(chr(i)).startswith("P")
+}.union(string.punctuation)
+WHITESPACE_LANGS = ["en", "es", "hi", "vi", "de", "ar"]
+MIXED_SEGMENTATION_LANGS = ["zh"]
+TYDIQA_LANG2CODES = {
+    "bengali": "bn",
+    "korean": "ko",
+    "swahili": "sw",
+    "english": "en",
+    "indonesian": "id",
+    "arabic": "ar",
+    "finnish": "fi",
+    "telugu": "te",
+    "russian": "ru",
+    "assamese": "as",
+    "persian": "fa",
+}
+logger = logging.Logger("Xlsum_task")
+LANGUAGE_TO_SUFFIX = {
+    "chinese_simplified": "zh-CN",
+    "french": "fr",
+    "portuguese": "pt",
+    "english": "en",
+    "arabic": "ar",
+    "hindi": "hi",
+    "indonesian": "id",
+    "amharic": "am",
+    "bengali": "bn",
+    "telugu": "te",
+    "burmese": "my",
+    "german": "de",
+    "greek": "el",
+    "tamil": "ta",
+    "assamese": "as",
+    "hindi": "hi",
+    "vietnamese": "vi",
+    "russian": "ru",
+    "telugu": "te",
+    "romanian": "ro",
+    "malayalam": "ml",
+    "persian": "fa",
+}
+PARAMS = NewType("PARAMS", Dict[str, Any])
+def read_parameters(args_path) -> PARAMS:
+    with open(args_path) as f:
+        args = yaml.load(f, Loader=SafeLoader)
+    return args
+def load_qa_dataset(dataset_name, lang, split, translate_test=False, limit=5):
+    if dataset_name == "indicqa":
+        if split != "train":
+            dataset = load_dataset(
+                "ai4bharat/IndicQA", f"indicqa.{INDICQA_LANG2CODES[lang]}"
+            )[split]
+        else:
+            dataset = load_dataset("squad_v2")[split]
+    elif dataset_name == "xquad":
+        if split != "train":
+            dataset = load_dataset("xquad", f"xquad.{XQUAD_LANG2CODES[lang]}")[
+                "validation"
+            ]
+        else:
+            dataset = load_dataset("squad")[split]
+    elif dataset_name == "tydiqa":
+        dataset = load_dataset("tydiqa", "secondary_task")[split]
+        dataset = dataset.map(
+            lambda example: {"lang": TYDIQA_LANG2CODES[example["id"].split("-")[0]]}
+        )
+        dataset = dataset.filter(lambda example: example["lang"] == lang)
+    elif dataset_name == "mlqa":
+        if split == "train":
+            print("No Training Data for MLQA, switching to validation!")
+            split = "validation"
+        if translate_test:
+            dataset_name = f"mlqa-translate-test.{lang}"
+        else:
+            dataset_name = f"mlqa.{lang}.{lang}"
+        dataset = load_dataset("mlqa", dataset_name)[split]
+    else:
+        raise NotImplementedError()
+    return dataset.select(np.arange(limit))
+def construct_prompt(
+        instruction: str,
+        test_example: dict,
+        ic_examples: List[dict],
+        zero_shot: bool,
+        lang: str,
+        config: Any,
+):
+    example_prompt = PromptTemplate(
+        input_variables=["context", "question", "answers"],
+        template="Context: {context}  \n  Question: {question} \n " "Answers: {answers}",
+    )
+    zero_shot_template = (
+            f"""{instruction}""" + " \n <Context>: {context}  \n <Question>: {question} " ""
+    )
+    prompt = (
+        FewShotPromptTemplate(
+            examples=ic_examples,
+            prefix=instruction,
+            example_prompt=example_prompt,
+            suffix="<Context>: {context}  \n <Question>: {question}  \n Answers: ?",
+            input_variables=["question", "context"],
+        )
+        if not zero_shot
+        else PromptTemplate(
+            input_variables=["question", "context"], template=zero_shot_template
+        )
+    )
+    label = test_example["answers"]
+    if config["input"] != lang:
+        test_example = _translate_example(
+            example=test_example, src_language=lang, target_language=config["input"]
+        )
+    return (
+        prompt.format(
+            question=test_example["question"], context=test_example["context"]
+        ),
+        label,
+    )
+def dump_metrics(
+        lang: str, config: Dict[str, str], f1: float, em: float, metric_logger_path: str
+):
+    # Check if the metric logger file exists
+    file_exists = os.path.exists(metric_logger_path)
+    # Open the CSV file in append mode
+    with open(metric_logger_path, "a", newline="") as f:
+        csvwriter = csv.writer(f, delimiter=",")
+        # Write header row if the file is newly created
+        if not file_exists:
+            header = ["Language", "Prefix", "Input", "Context", "Output", "F1", "Em"]
+            csvwriter.writerow(header)
+        csvwriter.writerow(
+            [
+                lang,
+                config["prefix"],
+                config["input"],
+                config["context"][0],
+                config["output"],
+                f1,
+                em,
+            ]
+        )
+def dump_predictions(idx, response, label, response_logger_file):
+    obj = {"q_idx": idx, "prediction": response, "label": label}
+    with open(response_logger_file, "a") as f:
+        f.write(json.dumps(obj, ensure_ascii=False) + " \n ")
+def _translate_instruction(basic_instruction: str, target_language: str) -> str:
+    translator = EasyGoogleTranslate(
+        source_language="en",
+        target_language=LANGUAGE_TO_SUFFIX[target_language],
+        timeout=50,
+    )
+    return translator.translate(basic_instruction)
+def _translate_prediction_to_output_language(
+        prediction: str, prediction_language: str, output_language: str
+) -> str:
+    translator = EasyGoogleTranslate(
+        source_language=LANGUAGE_TO_SUFFIX[prediction_language],
+        target_language=LANGUAGE_TO_SUFFIX[output_language],
+        timeout=10,
+    )
+    return translator.translate(prediction)
+def create_instruction(lang: str, expected_output: str):
+    basic_instruction = (
+        "Answer to the <Question> below, based only to the given <Context>, Follow these instructions: \n "
+        "1. The answer should include only words from the given context \n "
+        "2. The answer must include up to 5 words \n "
+        "3. The answer Should be the shortest as possible \n "
+        f"4. The answer must be in {expected_output} only!, not another language!!!"
+    )
+    return (
+        basic_instruction
+        if expected_output == "english"
+        else _translate_instruction(basic_instruction, target_language=lang)
+    )
+def _translate_example(
+        example: Dict[str, str], src_language: str, target_language: str
+):
+    translator = EasyGoogleTranslate(
+        source_language=LANGUAGE_TO_SUFFIX[src_language],
+        target_language=LANGUAGE_TO_SUFFIX[target_language],
+        timeout=30,
+    )
+    try:
+        return {
+            "question": translator.translate(example["question"]),
+            "context": translator.translate(example["context"][:2000])
+                       + translator.translate(example["context"][2000:4000])
+                       + translator.translate(example["context"][4000:6000]),
+            "answers": "",
+        }
+    except Exception as e:
+        pass
+def choose_few_shot_examples(
+        train_dataset: Dataset,
+        few_shot_size: int,
+        context: List[str],
+        selection_criteria: str,
+        lang: str,
+) -> List[Dict[str, Union[str, int]]]:
+    """Selects few-shot examples from training datasets
+    Args:
+        train_dataset (Dataset): Training Dataset
+        few_shot_size (int): Number of few-shot examples
+        selection_criteria (few_shot_selection): How to select few-shot examples. Choices: [random, first_k]
+    Returns:
+        List[Dict[str, Union[str, int]]]: Selected examples
+    """
+    selected_examples = []
+    example_idxs = []
+    if selection_criteria == "first_k":
+        example_idxs = list(range(few_shot_size))
+    elif selection_criteria == "random":
+        example_idxs = (
+            np.random.choice(len(train_dataset), size=few_shot_size, replace=True)
+            .astype(int)
+            .tolist()
+        )
+    ic_examples = [
+        {
+            "question": train_dataset[idx]["question"],
+            "context": train_dataset[idx]["context"],
+            "answers": train_dataset[idx]["answers"]["text"],
+        }
+        for idx in example_idxs
+    ]
+    for idx, ic_language in enumerate(context):
+        (
+            selected_examples.append(ic_examples[idx])
+            if ic_language == lang
+            else (
+                selected_examples.append(
+                    _translate_example(
+                        example=ic_examples[idx],
+                        src_language=lang,
+                        target_language=ic_language,
+                    )
+                )
+            )
+        )
+    return selected_examples
+def normalize_answer(s):
+    """Lower text and remove punctuation, articles and extra whitespace."""
+    def remove_articles(text):
+        return re.sub(r"\b(a|an|the)\b", " ", text)
+    def white_space_fix(text):
+        return " ".join(text.split())
+    def remove_punc(text):
+        exclude = set(PUNCT)  # set(string.punctuation)
+        return "".join(ch for ch in text if ch not in exclude)
+    def lower(text):
+        return text.lower()
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+def process_test_example(
+        test_data, config_header, idx, test_example, config, zero_shot, lang, params
+):
+    try:
+        # Your existing code for processing each test example
+        instruction = create_instruction(
+            lang=config["prefix"], expected_output=config["output"]
+        )
+        text_example = {
+            "question": test_example["question"],
+            "context": test_example["context"],
+            "answers": test_example["answers"]["text"],
+        }
+        ic_examples = []
+        if not zero_shot:
+            ic_examples = choose_few_shot_examples(
+                train_dataset=test_data,
+                few_shot_size=len(config["context"]),
+                context=config["context"],
+                selection_criteria="random",
+                lang=params["selected_language"],
+            )
+        prompt, label = construct_prompt(
+            instruction=instruction,
+            test_example=text_example,
+            ic_examples=ic_examples,
+            zero_shot=zero_shot,
+            lang=lang,
+            config=config,
+        )
+        print(len(prompt))
+        pred = get_prediction(prompt=prompt, endpoint_id=7327255438662041600, project_id=16514800572)
+        # pred = mixtral_completion(prompt)
+        print(pred)
+        logger.info("Saving prediction to persistent volume")
+        os.makedirs(
+            f"{params['response_logger_root']}/{params['model']}/{lang}", exist_ok=True
+        )
+        dump_predictions(
+            idx=idx,
+            response=pred,
+            label=label,
+            response_logger_file=f"{params['response_logger_root']}/{params['model']}/{lang}/{config_header}.csv",
+        )
+    except Exception as e:
+        # Handle exceptions here
+        print(f"Error processing example {idx}: {e}")
+def run_one_configuration(params: Optional[PARAMS] = None):
+    if not params:
+        params = read_parameters("../../parameters.yaml")
+    lang = params["selected_language"]
+    config = params["config"]
+    zero_shot = len(config["context"]) == 0
+    rouge1, rouge2, rougeL, normalized_ic_examples, batched_predictions = (
+        [],
+        [],
+        [],
+        [],
+        [],
+    )
+    config_header = f"{config['input']}_{config['prefix']}_{config['context'][0]}_{config['output']}"
+    dataset_name = params["dataset_name"]
+    squad_metric = load("squad")
+    metric = params["metric"]
+    f1_sum = 0
+    em_sum = 0
+    avg_em = 0
+    avg_f1 = 0
+    preds = []
+    labels = []
+    f1s, ems = [], []
+    test_data = load_qa_dataset(
+        dataset_name=params["dataset_name"],
+        lang=lang,
+        split="validation" if params["dataset_name"] == "xquad" else "test",
+        limit=params["limit"],
+    )
+    for idx, test_example in (pbar := tqdm(enumerate(test_data))):
+        try:
+            instruction = create_instruction(
+                lang=config["prefix"], expected_output=config["output"]
+            )
+            text_example = {
+                "question": test_example["question"],
+                "context": test_example["context"],
+                "answers": test_example["answers"]["text"],
+            }
+            ic_examples = []
+            if not zero_shot:
+                ic_examples = choose_few_shot_examples(
+                    train_dataset=test_data,
+                    few_shot_size=len(config["context"]),
+                    context=config["context"],
+                    selection_criteria="random",
+                    lang=params["selected_language"],
+                )
+            prompt, label = construct_prompt(
+                instruction=instruction,
+                test_example=text_example,
+                ic_examples=ic_examples,
+                zero_shot=zero_shot,
+                lang=lang,
+                config=config,
+            )
+            pred = mt0_completion(prompt=prompt)
+            print(pred)
+            logger.info("Saving prediction to persistent volume")
+            os.makedirs(
+                f"{params['response_logger_root']}" + f"{params['model']}" + f"/{lang}",
+                exist_ok=True,
+            )
+            dump_predictions(
+                idx=idx,
+                response=pred,
+                label=label,
+                response_logger_file=f"{params['response_logger_root']}"
+                                     + f"/{params['model']}"
+                                     + f"/{lang}/"
+                                     + config_header
+                                     + ".csv",
+            )
+            #
+            # normalized_prediction = normalize_answer(pred)
+            # batched_predictions.append(normalized_prediction)
+            #
+            # if config["output"] != params["selected_language"]:
+            #     pred = _translate_prediction_to_output_language(
+            #         prediction=normalized_prediction,
+            #         prediction_language=config["output"],
+            #         output_language=params["selected_language"],
+            #     )
+            #     print(
+            #         f"Translated the prediciton from {config['output']} to {params['selected_language']}"
+            #     )
+            #
+            # logger.info("Starting evaluation")
+            #
+            # if dataset_name == "xquad":
+            #     prediction = {"prediction_text": pred, "id": test_example["id"]}
+            #
+            # reference = {}
+            # reference["answers"] = test_example["answers"]
+            # reference["id"] = test_example["id"]
+            # if reference["answers"]["text"][0] == "":
+            #     reference["answers"]["text"] = []
+            #     reference["answers"]["answer_start"] = []
+            #
+            # if params["metric"] == "squad":
+            #     results = squad_metric.compute(
+            #         predictions=[prediction], references=[reference]
+            #     )
+            # else:
+            #     results = squad_metric.compute(
+            #         predictions=[prediction],
+            #         references=[reference],
+            #         no_answer_threshold=0.9,
+            #     )
+            #
+            # f1_sum += results["f1"]
+            # if metric == "squad":
+            #     em_sum += results["exact_match"]
+            # else:
+            #     em_sum += results["exact"]
+            # avg_f1 = f1_sum / (idx + 1)
+            # avg_em = em_sum / (idx + 1)
+            #
+            # preds.append(prediction)
+            # labels.append(reference)
+            # f1s.append(results["f1"])
+            # if metric == "squad":
+            #     ems.append(results["exact_match"])
+            # else:
+            #     ems.append(results["exact"])
+        except Exception as e:
+            print(f"Found an exception {e}, continue to the next example")
+            continue
+    os.makedirs(f"{params['metrics_root']}" + f"/{params['model']}", exist_ok=True)
+    dump_metrics(
+        lang,
+        config,
+        avg_f1,
+        avg_em,
+        f"{params['metrics_root']}" + f"/{params['model']}" + f"/{lang}.csv",
+    )
+# if __name__ == "__main__":
+#     run_one_configuration()
+def run_one_configuration_paralle(params: Optional[PARAMS] = None, zero: bool = False):
+    if not params:
+        params = read_parameters("../../parameters.yaml")
+    lang = params["selected_language"]
+    config = params["config"]
+    zero_shot = len(config["context"]) == 0
+    rouge1, rouge2, rougeL, normalized_ic_examples, batched_predictions = (
+        [],
+        [],
+        [],
+        [],
+        [],
+    )
+    if not zero:
+        config_header = f"{config['input']}_{config['prefix']}_{config['context'][0]}_{config['output']}"
+    else:
+        config_header = f"{config['input']}_{config['prefix']}_zero_{config['output']}"
+    test_data = load_qa_dataset(
+        dataset_name=params["dataset_name"],
+        lang=lang,
+        split="validation" if params["dataset_name"] == "xquad" else "test",
+        limit=params["limit"],
+    )
+    # Initialize multiprocessing poosl
+    num_processes = mp.cpu_count()  # Use number of available CPU cores
+    pool = mp.Pool(processes=10)
+    # Iterate over test_data using tqdm for progress tracking
+    for idx, test_example in tqdm(enumerate(test_data), total=len(test_data)):
+        # Apply asynchronous processing of each test example
+        pool.apply_async(
+            process_test_example,
+            args=(
+                test_data,
+                config_header,
+                idx,
+                test_example,
+                config,
+                zero_shot,
+                lang,
+                params,
+            ),
+        )
+    # Close the pool and wait for all processes to finish
+    pool.close()
+    pool.join()
+def construct_prompt(
+    instruction: str,
+    test_example: dict,
+    zero_shot: bool,
+    num_examples: int,
+    lang: str,
+    config: Dict[str, str],
+    dataset_name: str = 'xquad'
+):
+    if not instruction:
+        instruction = create_instruction(lang, config['prefix'])
+    example_prompt = PromptTemplate(
+        input_variables=["context", "question", "answers"],
+        template="Context: {context} \n Question: {question} \n " "Answers: {answers}",
+    )
+    zero_shot_template = (
+            f"""{instruction}""" + " \n <Context>: {context}  \n <Question>: {question} " ""
+    )
+    test_data = load_qa_dataset(dataset_name = dataset_name, lang=lang, split="test", limit=100)
+    print(test_data)
+    print(num_examples)
+    print(lang)
+    ic_examples = []
+    if not zero_shot:
+        ic_examples = choose_few_shot_examples(
+            train_dataset=test_data,
+            few_shot_size=num_examples,
+            context=[config["context"]] * num_examples,
+            selection_criteria="random",
+            lang=lang,
+        )
+    prompt = (
+        FewShotPromptTemplate(
+            examples=ic_examples,
+            prefix=instruction,
+            example_prompt=example_prompt,
+            suffix="<Context>: {context}  \n <Question>: {question}  \n Answers: ?",
+            input_variables=["question", "context"],
+        )
+        if not zero_shot
+        else PromptTemplate(
+            input_variables=["question", "context"], template=zero_shot_template
+        )
+    )
+    print("lang", lang)
+    print(config["input"] , lang)
+    if config["input"] != lang:
+        test_example = _translate_example(
+            example=test_example, src_language=lang, target_language=config["input"]
+        )
+    return prompt.format(
+            question=test_example["question"], context=test_example["context"]
+        )

tasks/summarization.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from typing import List, Dict, Optional, Union
+import numpy as np
+from datasets import Dataset, load_dataset
+from easygoogletranslate import EasyGoogleTranslate
+from langchain.prompts import PromptTemplate, FewShotPromptTemplate
+LANGUAGE_TO_SUFFIX = {
+    "chinese_simplified": "zh-CN",
+    "french": "fr",
+    "portuguese": "pt",
+    "english": "en",
+    "arabic": "ar",
+    "hindi": "hi",
+    "indonesian": "id",
+    "amharic": "am",
+    "bengali": "bn",
+    "burmese": "my",
+    "uzbek": "uz",
+    "nepali": "ne",
+    "japanese": "ja",
+    "spanish": "es",
+    "turkish": "tr",
+    "persian": "fa",
+    "azerbaijani": "az",
+    "korean": "ko",
+}
+def choose_few_shot_examples(
+        train_dataset: Dataset, few_shot_size: int, context: List[str], selection_criteria: str, lang: str,
+) -> List[Dict[str, Union[str, int]]]:
+    selected_examples = []
+    example_idxs = []
+    if selection_criteria == "first_k":
+        example_idxs = list(range(few_shot_size))
+    elif selection_criteria == "random":
+        example_idxs = (
+            np.random.choice(len(train_dataset), size=few_shot_size, replace=True)
+            .astype(int)
+            .tolist()
+        )
+    ic_examples = [{'text': train_dataset[idx]['text'], 'summary': train_dataset[idx]['summary']} for idx in
+                   example_idxs]
+    for idx, ic_language in enumerate(context):
+        selected_examples.append(ic_examples[idx]) if ic_language == lang else (
+            selected_examples.append(
+                _translate_example(example=ic_examples[idx], src_language=lang, target_language=ic_language)))
+    return selected_examples
+def _translate_instruction(basic_instruction: str, target_language: str) -> str:
+    translator = EasyGoogleTranslate(
+        source_language="en",
+        target_language=LANGUAGE_TO_SUFFIX[target_language],
+        timeout=50,
+    )
+    return translator.translate(basic_instruction)
+def _translate_example(example: Dict[str, str], src_language: str, target_language: str):
+    translator = EasyGoogleTranslate(source_language=LANGUAGE_TO_SUFFIX[src_language],
+                                     target_language=LANGUAGE_TO_SUFFIX[target_language],
+                                     timeout=30)
+    try:
+        return {'text': translator.translate(example['text']), 'summary': ''}
+    except Exception as e:
+        print(e)
+def create_instruction(lang: str, expected_output: str):
+    basic_instruction = (
+        f"Write a summary of the given <Text> \n The output should be in {expected_output} "
+        f"\n The output must be up to 2 sentences maximum!!!"
+    )
+    print(lang)
+    return (
+        basic_instruction
+        if expected_output == "english"
+        else _translate_instruction(basic_instruction, target_language=lang)
+    )
+def load_xlsum_data(lang, split, limit = 5):
+    """Loads the xlsum dataset"""
+    dataset = load_dataset("csebuetnlp/xlsum", lang)[split]
+    return dataset.select(range(limit))
+def construct_prompt(
+    instruction: str,
+    test_example: dict,
+    zero_shot: bool,
+    dataset: str,
+    num_examples: int,
+    lang: str,
+    config: Dict[str, str],
+):
+    if not instruction:
+        print(lang)
+        instruction = create_instruction(lang, config['prefix'])
+    example_prompt = PromptTemplate(
+        input_variables=["summary", "text"], template="Text: {text}\nSummary: {summary}"
+    )
+    zero_shot_template = f"""{instruction}""" + "\n Input: {text} " ""
+    test_data = load_xlsum_data(lang=lang, split="test", limit=100)
+    print(test_data)
+    print(num_examples)
+    print(lang)
+    ic_examples = []
+    if not zero_shot:
+        ic_examples = choose_few_shot_examples(
+            train_dataset=test_data,
+            few_shot_size=num_examples,
+            context=[config["context"]] * num_examples,
+            selection_criteria="random",
+            lang=lang,
+        )
+    prompt = (
+        FewShotPromptTemplate(
+            examples=ic_examples,
+            prefix=instruction,
+            example_prompt=example_prompt,
+            suffix="<Text>: {text}",
+            input_variables=["text"],
+        )
+        if not zero_shot
+        else PromptTemplate(input_variables=["text"], template=zero_shot_template)
+    )
+    print("lang", lang)
+    print(config["input"] , lang)
+    if config["input"] != lang:
+        test_example = _translate_example(
+            example=test_example, src_language=lang, target_language=config["input"]
+        )
+    print("test_example", prompt)
+    return prompt.format(text=test_example["text"])