joaomorossini commited on
Commit
23fee25
1 Parent(s): 0a08480

refactoring: create separate file for the prompt template

Browse files
Files changed (2) hide show
  1. app.py +5 -39
  2. prompt_template.py +36 -0
app.py CHANGED
@@ -9,6 +9,7 @@ from pandas import DataFrame as PandasDataFrame
9
  from llm import MessageChatCompletion
10
  from customization import css, js
11
  from examples import example_1, example_2, example_3, example_4
 
12
 
13
  load_dotenv()
14
 
@@ -33,46 +34,10 @@ def build_context(row):
33
 
34
  def click_button(model, api_key, abstract):
35
  labels = df['Subsector'].tolist()
36
- contexts = [build_context(row) for _, row in df.iterrows()]
37
  language_model = MessageChatCompletion(model=model, api_key=api_key)
38
- system_message = (f"""
39
- You are a system designed to classify patent abstracts into one or more subsectors based on their content.
40
- Each subsector is defined by a unique set of characteristics:
41
- Name: The name of the subsector.
42
- Definition: A brief description of the subsector.
43
- Keywords: Important words associated with the subsector.
44
- Does include: Elements typically found within the subsector.
45
- Does not include: Elements typically not found within the subsector.
46
- Consider 'nan' values as 'not available' or 'not applicable'.
47
- When classifying an abstract, provide the following:
48
- ## 1. Subsector(s): Name(s) of the subsector(s) you believe the abstract belongs to.
49
- ## 2. Reasoning:
50
- ### Conclusion: Explain why the abstract was classified in this subsector(s), based on its alignment with the subsector's definition, keywords, and includes/excludes criteria.
51
- ### Keywords found: Specify any 'Keywords' from the subsector that are present in the abstract.
52
- ### Does include found: Specify any 'Includes' criteria from the subsector that are present in the abstract.
53
- ### If no specific 'Keywords' or 'Includes' are found, state that none were directly identified, but the classification was made based on the overall relevance to the subsector.
54
- ## 3. Non-selected Subsectors:
55
- - If a subsector had a high probability of being a match but was ultimately not chosen because the abstract contained terms from the 'Does not include' list, provide a brief explanation. Highlight the specific 'Does not include' terms found and why this led to the subsector's exclusion.
56
- ## 4. Other Subsectors: You MUST ALWAYS SUGGEST NEW SUBSECTOR LABELS, different from the ones provided by the user. They can be new subsectors or subsets the given subsectors. REMEMBER: This is mandatory
57
- ## 5. Match Score: Inside a markdown code block, provide a PYTHON DICTIONARY containing the match scores for all existing subsector labels and for any new labels suggested in item 4. Each probability should be formatted to show two decimal places.
58
- <context>
59
- {contexts}
60
- </context>
61
- """)
62
-
63
-
64
- user_message = f"""
65
- Classify this patent abstract into one or more labels, then format your response as markdown:
66
-
67
- <labels>
68
- {labels}
69
- </labels>
70
-
71
- <abstract>
72
- {abstract}
73
- </abstract>
74
- """
75
-
76
  language_model.new_system_message(content=system_message)
77
  language_model.new_user_message(content=user_message)
78
  language_model.send_message()
@@ -94,6 +59,7 @@ def click_button(model, api_key, abstract):
94
 
95
  return match_score_dict, response_reasoning, logs_df
96
 
 
97
  def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
98
  selected = df.iloc[[evt.index[0]]].iloc[0]
99
  name, definition, keywords, does_include, does_not_include = selected['Subsector'], selected['Definition'], selected['Keywords'], selected['Does include'], selected['Does not include']
 
9
  from llm import MessageChatCompletion
10
  from customization import css, js
11
  from examples import example_1, example_2, example_3, example_4
12
+ from prompt_template import system_message_template, user_message_template
13
 
14
  load_dotenv()
15
 
 
34
 
35
  def click_button(model, api_key, abstract):
36
  labels = df['Subsector'].tolist()
37
+ prompt_context = [build_context(row) for _, row in df.iterrows()]
38
  language_model = MessageChatCompletion(model=model, api_key=api_key)
39
+ system_message = system_message_template.format(prompt_context=prompt_context)
40
+ user_message = user_message_template.format(labels=labels, abstract=abstract)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  language_model.new_system_message(content=system_message)
42
  language_model.new_user_message(content=user_message)
43
  language_model.send_message()
 
59
 
60
  return match_score_dict, response_reasoning, logs_df
61
 
62
+
63
  def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
64
  selected = df.iloc[[evt.index[0]]].iloc[0]
65
  name, definition, keywords, does_include, does_not_include = selected['Subsector'], selected['Definition'], selected['Keywords'], selected['Does include'], selected['Does not include']
prompt_template.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ system_message_template = """
2
+ You are a system designed to classify patent abstracts into one or more subsectors based on their content.
3
+ Each subsector is defined by a unique set of characteristics:
4
+ Name: The name of the subsector.
5
+ Definition: A brief description of the subsector.
6
+ Keywords: Important words associated with the subsector.
7
+ Does include: Elements typically found within the subsector.
8
+ Does not include: Elements typically not found within the subsector.
9
+ Consider 'nan' values as 'not available' or 'not applicable'.
10
+ When classifying an abstract, provide the following:
11
+ ## 1. Subsector(s): Name(s) of the subsector(s) you believe the abstract belongs to.
12
+ ## 2. Reasoning:
13
+ ### Conclusion: Explain why the abstract was classified in this subsector(s), based on its alignment with the subsector's definition, keywords, and includes/excludes criteria.
14
+ ### Keywords found: Specify any 'Keywords' from the subsector that are present in the abstract.
15
+ ### Does include found: Specify any 'Includes' criteria from the subsector that are present in the abstract.
16
+ ### If no specific 'Keywords' or 'Includes' are found, state that none were directly identified, but the classification was made based on the overall relevance to the subsector.
17
+ ## 3. Non-selected Subsectors:
18
+ - If a subsector had a high probability of being a match but was ultimately not chosen because the abstract contained terms from the 'Does not include' list, provide a brief explanation. Highlight the specific 'Does not include' terms found and why this led to the subsector's exclusion.
19
+ ## 4. Other Subsectors: You MUST ALWAYS SUGGEST NEW SUBSECTOR LABELS, different from the ones provided by the user. They can be new subsectors or subsets the given subsectors. REMEMBER: This is mandatory
20
+ ## 5. Match Score: Inside a markdown code block, provide a PYTHON DICTIONARY containing the match scores for all existing subsector labels and for any new labels suggested in item 4. Each probability should be formatted to show two decimal places.
21
+ <context>
22
+ {prompt_context}
23
+ </context>
24
+ """
25
+
26
+ user_message_template = """
27
+ Classify this patent abstract into one or more labels, then format your response as markdown:
28
+
29
+ <labels>
30
+ {labels}
31
+ </labels>
32
+
33
+ <abstract>
34
+ {abstract}
35
+ </abstract>
36
+ """