Elron commited on
Commit
2109a58
1 Parent(s): df3c5b3

Upload formats.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. formats.py +28 -9
formats.py CHANGED
@@ -17,15 +17,17 @@ class SystemFormat(Format):
17
  r"""Generates the whole input to the model, from constant strings that are given as args, and from values found in specified fields of the instance.
18
 
19
  SystemFormat expects the input instance to contain:
20
- 1. A field named "source" whose value is a string verbalizing the original values in the instance (as read
 
21
  from the source dataset), in the context of the underlying task.
22
- 2. A field named "instruction" that contains a (non-None) string.
23
- 3. A field named with the value in arg 'demos_field', containing a list of dicts, each dict with fields "source"
24
  and "target", representing a single demo.
 
25
 
26
  SystemFormat formats the above fields into a single string to be inputted to the model. This string overwrites
27
  field "source" of the instance. Formatting is driven by two args: 'demo_format' and 'model_input_format'.
28
- SystemFormat also pops field "instruction" and the field containing the demos out from the input instance.
29
 
30
  Args:
31
  demos_field (str): the name of the field that contains the demos, being a list of dicts, each with "source" and "target" keys
@@ -67,10 +69,10 @@ class SystemFormat(Format):
67
  """
68
 
69
  demos_field: str = "demos"
70
- demo_format: str = (
71
- "{source}\n{target}\n\n" # example: "User: {source}\nAgent: {target}\n\n"
 
72
  )
73
- model_input_format: str = "{instruction}{demos}{source}\n"
74
 
75
  @staticmethod
76
  def _retrieve_field_and_assert_not_none(instance, field_name) -> str:
@@ -95,9 +97,20 @@ class SystemFormat(Format):
95
  instruction = self._retrieve_field_and_assert_not_none(
96
  instance=instance, field_name="instruction"
97
  )
98
- # pop "instruction" from instance
 
 
 
 
 
 
 
 
 
99
  if "instruction" in instance:
100
  instance.pop("instruction")
 
 
101
 
102
  demo_instances = []
103
  if self.demos_field is not None and self.demos_field in instance:
@@ -111,13 +124,19 @@ class SystemFormat(Format):
111
 
112
  demos_string = ""
113
  for demo_instance in demo_instances:
114
- demo_str = self.demo_format.format(**demo_instance)
 
 
 
 
115
  demos_string += demo_str
116
 
117
  output = self.model_input_format.format(
 
118
  instruction=instruction,
119
  demos=demos_string,
120
  source=source,
 
121
  )
122
  instance["source"] = output
123
  return instance
 
17
  r"""Generates the whole input to the model, from constant strings that are given as args, and from values found in specified fields of the instance.
18
 
19
  SystemFormat expects the input instance to contain:
20
+ 1. A field named "system_prompt" whose value is a string (potentially empty) that delivers a task independent opening text.
21
+ 2. A field named "source" whose value is a string verbalizing the original values in the instance (as read
22
  from the source dataset), in the context of the underlying task.
23
+ 3. A field named "instruction" that contains a (non-None) string.
24
+ 4. A field named with the value in arg 'demos_field', containing a list of dicts, each dict with fields "source"
25
  and "target", representing a single demo.
26
+ 5. A field named "target_prefx" that contains a string to prefix the target in both each demo, and to end the whole generated prompt
27
 
28
  SystemFormat formats the above fields into a single string to be inputted to the model. This string overwrites
29
  field "source" of the instance. Formatting is driven by two args: 'demo_format' and 'model_input_format'.
30
+ SystemFormat also pops fields "system_prompt", "instruction", "target_prefix", and the field containing the demos out from the input instance.
31
 
32
  Args:
33
  demos_field (str): the name of the field that contains the demos, being a list of dicts, each with "source" and "target" keys
 
69
  """
70
 
71
  demos_field: str = "demos"
72
+ demo_format: str = "{source}\n{target_prefix}{target}\n\n" # example: "User: {source}\nAgent: {target}\n\n"
73
+ model_input_format: str = (
74
+ "{system_prompt}{instruction}{demos}{source}\n{target_prefix}"
75
  )
 
76
 
77
  @staticmethod
78
  def _retrieve_field_and_assert_not_none(instance, field_name) -> str:
 
97
  instruction = self._retrieve_field_and_assert_not_none(
98
  instance=instance, field_name="instruction"
99
  )
100
+ target_prefix = self._retrieve_field_and_assert_not_none(
101
+ instance=instance, field_name="target_prefix"
102
+ )
103
+ system_prompt = self._retrieve_field_and_assert_not_none(
104
+ instance=instance, field_name="system_prompt"
105
+ )
106
+
107
+ # pop "system_prompt", "instruction", and "target_prefix" from instance
108
+ if "target_prefix" in instance:
109
+ instance.pop("target_prefix")
110
  if "instruction" in instance:
111
  instance.pop("instruction")
112
+ if "system_prompt" in instance:
113
+ instance.pop("system_prompt")
114
 
115
  demo_instances = []
116
  if self.demos_field is not None and self.demos_field in instance:
 
124
 
125
  demos_string = ""
126
  for demo_instance in demo_instances:
127
+ demo_str = self.demo_format.format(
128
+ target_prefix=target_prefix,
129
+ source=demo_instance["source"],
130
+ target=demo_instance["target"],
131
+ )
132
  demos_string += demo_str
133
 
134
  output = self.model_input_format.format(
135
+ system_prompt=system_prompt,
136
  instruction=instruction,
137
  demos=demos_string,
138
  source=source,
139
+ target_prefix=target_prefix,
140
  )
141
  instance["source"] = output
142
  return instance