File size: 2,779 Bytes
8773ff3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import json
from typing import Any, Dict, List

from distilabel.steps.tasks.typing import ChatType
from distilabel.steps.tasks.text_generation import TextGeneration
from distilabel.steps import StepInput, StepOutput, Step

from dotenv import load_dotenv

from defaults import (
    DEFAULT_DOMAIN,
    DEFAULT_PERSPECTIVES,
    DEFAULT_TOPICS,
    DEFAULT_EXAMPLES,
    DEFAULT_SYSTEM_PROMPT,
    N_PERSPECTIVES,
    N_TOPICS,
    N_EXAMPLES,
)

load_dotenv()

# Application description used for SelfInstruct
APPLICATION_DESCRIPTION = f"""You are an AI assistant than generates queries around the domain of {DEFAULT_DOMAIN}.
Your should not expect basic but profound questions from your users.
The queries should reflect a diversity of vision and economic positions and political positions.
The queries may know about different methods of {DEFAULT_DOMAIN}.
The queries can be positioned politically, economically, socially, or practically.
Also take into account the impact of diverse causes on diverse domains."""


TOPICS = DEFAULT_TOPICS[:N_TOPICS]
PERSPECTIVES = DEFAULT_PERSPECTIVES[:N_PERSPECTIVES]
EXAMPLES = DEFAULT_EXAMPLES[:N_EXAMPLES]


def create_examples_template(examples: List[Dict[str, str]]) -> List[str]:
    questions = """ Examples of high quality questions:"""
    answers = """ Examples of high quality answers:"""
    for example in examples:
        questions += f"""\n- Question: {example["question"]}\n"""
        answers += f"""\n- Answer: {example["answer"]}\n"""

    _template: str = (
        """{instruction}\nThis is the the instruction.\n Examples: """
        + questions
        + answers
    )
    return _template


def create_topics(topics: List[str], positions: List[str]) -> List[str]:
    return [
        f"{topic} from a {position} perspective"
        for topic in topics
        for position in positions
    ]


class DomainExpert(TextGeneration):
    """A customized task to generate text as a domain expert in the domain of farming and agriculture."""

    _system_prompt: (str) = DEFAULT_SYSTEM_PROMPT
    _template: str = """{instruction}\nThis is the the instruction.\n Examples: """

    def format_input(self, input: Dict[str, Any]) -> "ChatType":
        return [
            {
                "role": "system",
                "content": self._system_prompt,
            },
            {
                "role": "user",
                "content": self._template.format(**input),
            },
        ]


class CleanNumberedList(Step):
    """A step to clean the numbered list of questions."""

    def process(self, inputs: StepInput) -> StepOutput:
        import re

        pattern = r"^\d+\.\s"

        for input in inputs:
            input["question"] = re.sub(pattern, "", input["question"])
        yield inputs