tomascufaro commited on
Commit
b8ae7f5
1 Parent(s): ad82fc4

no categories support

Browse files
Files changed (1) hide show
  1. app.py +60 -30
app.py CHANGED
@@ -19,6 +19,7 @@ from langchain.prompts import ChatPromptTemplate
19
  import gradio as gr
20
  from collections import defaultdict
21
 
 
22
  # Schema
23
  schema = {
24
  "properties": {
@@ -31,7 +32,7 @@ schema = {
31
  # Input
32
  prompt = ChatPromptTemplate.from_messages(
33
  [
34
- ("system", "You are an expert marketing researcher specialized in the finance industry"),
35
  ("human", """{prompt_input}.
36
  Here you have the categories splitted by coma: {categories}.
37
  and Here you have the keywords splitted by coma: {keywords}."""),
@@ -39,10 +40,20 @@ prompt = ChatPromptTemplate.from_messages(
39
  ]
40
  )
41
 
42
- llm = ChatOpenAI(temperature=0, openai_api_key=os.environ['OpenAI_APIKEY'], model="gpt-3.5-turbo")
 
 
 
 
 
 
 
 
 
43
  chain = create_extraction_chain(schema, llm, prompt, verbose=1)
 
44
 
45
- def run_chain(input_prompt, keywords_file, categories_file, batch_size=50):
46
  results = []
47
  batch_size = batch_size
48
  index = 0
@@ -50,31 +61,51 @@ def run_chain(input_prompt, keywords_file, categories_file, batch_size=50):
50
  keywords = pd.read_csv(keywords_file.name)
51
  except:
52
  keywords = pd.read_excel(keywords_file.name)
53
- try:
54
- categories = pd.read_csv(categories_file.name)
55
- except:
56
- categories = pd.read_excel(categories_file.name)
57
- keywords = list(keywords[keywords.columns[0]].values)
58
- categories = list(categories[categories.columns[0]].values)
59
- while index < len(keywords):
60
  try:
61
- batch = keywords[index:index+batch_size]
62
  except:
63
- batch = keywords[index:]
64
- try:
65
- result = chain.run({'prompt_input':input_prompt, 'categories':','.join(categories), 'keywords':','.join(batch)})
66
- except Exception as E:
67
- print('this batch did not worked from {} to {}'.format(index, index + batch_size))
68
- print(E)
69
- result = []
70
- results += result
71
- index += batch_size
72
- results_to_csv(results)
73
- #print((index, batch_size, len(keywords)))
74
- return results, 'themes_results.csv'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def results_to_csv(results):
77
- super_dict = defaultdict(list)
78
  for d in results:
79
  for k, v in d.items(): # d.items() in Python 3+
80
  super_dict[k].append(v)
@@ -85,20 +116,19 @@ with gr.Blocks() as demo:
85
  prompt_input = gr.Text("""I need your help to analyze and categorize the provided list of keywords
86
  into the appropriate categories.
87
  The goal is to understand information demand on search engines within this industry. Each keyword represents a search and it should have a relation with the category.
88
- Extract each keyword and assign the best category among the given categories. Return every keyword with the relative category in pairs.""")
 
89
  gr.Markdown("Upload CSV or xlsx with keywords: Just a csv with all the keywords in one column. Should have a header")
90
  keywords_file = gr.File(file_types=['csv', 'xlsx'], label='keywords')
91
  gr.Markdown("Upload CSV or xlsx with categories: Just a csv with all the keywords in one column. Should have a header")
92
  categories_file = gr.File(file_types=['.csv', '.xlsx'], label='categories')
93
- with gr.Accordion("Open for More!"):
94
- gr.Markdown("Look at me...")
95
-
96
- btn = gr.Button(value="run")
97
  txt_3 = gr.Textbox(value="", label="Output")
98
  output_file = gr.File(label="Output File",
99
  file_count="single",
100
  file_types=["", ".", ".csv",".xls",".xlsx"])
101
 
102
  btn.click(run_chain, inputs=[prompt_input, keywords_file, categories_file], outputs=[txt_3, output_file])
103
-
104
  demo.launch()
 
19
  import gradio as gr
20
  from collections import defaultdict
21
 
22
+ """Core Modules"""
23
  # Schema
24
  schema = {
25
  "properties": {
 
32
  # Input
33
  prompt = ChatPromptTemplate.from_messages(
34
  [
35
+ ("system", "You are an expert marketing researcher"),
36
  ("human", """{prompt_input}.
37
  Here you have the categories splitted by coma: {categories}.
38
  and Here you have the keywords splitted by coma: {keywords}."""),
 
40
  ]
41
  )
42
 
43
+ prompt_no_cat = ChatPromptTemplate.from_messages(
44
+ [
45
+ ("system", "You are an expert marketing researcher"),
46
+ ("human", """{prompt_input}.
47
+ and Here you have the keywords splitted by coma: {keywords}."""),
48
+ ("human", "Tip: Make sure to answer in the correct format and DO NOT leave keywords without category and DO NOT skip keywords. Please categorize all the keywords that I give you, each keyword must have just one and only one category."),
49
+ ]
50
+ )
51
+
52
+ llm = ChatOpenAI(temperature=0, openai_api_key=APIkeys.OpenAI_APIKEY, model="gpt-3.5-turbo")
53
  chain = create_extraction_chain(schema, llm, prompt, verbose=1)
54
+ chain_no_cat = create_extraction_chain(schema, llm, prompt_no_cat, verbose=1)
55
 
56
+ def run_chain(input_prompt, keywords_file, categories_file=None, batch_size=50):
57
  results = []
58
  batch_size = batch_size
59
  index = 0
 
61
  keywords = pd.read_csv(keywords_file.name)
62
  except:
63
  keywords = pd.read_excel(keywords_file.name)
64
+ if categories_file != None:
 
 
 
 
 
 
65
  try:
66
+ categories = pd.read_csv(categories_file.name)
67
  except:
68
+ categories = pd.read_excel(categories_file.name)
69
+ categories = list(categories[categories.columns[0]].values)
70
+ keywords = list(keywords[keywords.columns[0]].values)
71
+ while index < len(keywords):
72
+ try:
73
+ batch = keywords[index:index+batch_size]
74
+ except:
75
+ batch = keywords[index:]
76
+ try:
77
+ result = chain.run({'prompt_input':input_prompt, 'categories':','.join(categories), 'keywords':','.join(batch)})
78
+ except Exception as E:
79
+ print('this batch did not worked from {} to {}'.format(index, index + batch_size))
80
+ print(E)
81
+ result = []
82
+ results += result
83
+ index += batch_size
84
+ results_to_csv(results)
85
+ #print((index, batch_size, len(keywords)))
86
+ return results, 'themes_results.csv'
87
+ else:
88
+ keywords = list(keywords[keywords.columns[0]].values)
89
+ batch_size = len(keywords)
90
+ while index < len(keywords):
91
+ try:
92
+ batch = keywords[index:index+batch_size]
93
+ except:
94
+ batch = keywords[index:]
95
+ try:
96
+ result = chain_no_cat.run({'prompt_input':input_prompt, 'keywords':','.join(batch)})
97
+ except Exception as E:
98
+ print('this batch did not worked from {} to {}'.format(index, index + batch_size))
99
+ print(E)
100
+ result = []
101
+ results += result
102
+ index += batch_size
103
+ results_to_csv(results)
104
+ #print((index, batch_size, len(keywords)))
105
+ return results, 'themes_results.csv'
106
 
107
  def results_to_csv(results):
108
+ super_dict = collections.defaultdict(list)
109
  for d in results:
110
  for k, v in d.items(): # d.items() in Python 3+
111
  super_dict[k].append(v)
 
116
  prompt_input = gr.Text("""I need your help to analyze and categorize the provided list of keywords
117
  into the appropriate categories.
118
  The goal is to understand information demand on search engines within this industry. Each keyword represents a search and it should have a relation with the category.
119
+ Extract each keyword and assign the best category among the given categories. Return every keyword with the relative category in pairs.
120
+ If the categories are not given """)
121
  gr.Markdown("Upload CSV or xlsx with keywords: Just a csv with all the keywords in one column. Should have a header")
122
  keywords_file = gr.File(file_types=['csv', 'xlsx'], label='keywords')
123
  gr.Markdown("Upload CSV or xlsx with categories: Just a csv with all the keywords in one column. Should have a header")
124
  categories_file = gr.File(file_types=['.csv', '.xlsx'], label='categories')
125
+ btn = gr.Button(value="Run with categories")
126
+ btn2 = gr.Button(value="Run without categories")
 
 
127
  txt_3 = gr.Textbox(value="", label="Output")
128
  output_file = gr.File(label="Output File",
129
  file_count="single",
130
  file_types=["", ".", ".csv",".xls",".xlsx"])
131
 
132
  btn.click(run_chain, inputs=[prompt_input, keywords_file, categories_file], outputs=[txt_3, output_file])
133
+ btn2.click(run_chain, inputs=[prompt_input, keywords_file], outputs=[txt_3, output_file])
134
  demo.launch()