import pandas as pd # for data manipulation (pip install pandas) from langchain.chat_models import ChatOpenAI from langchain.chains import create_extraction_chain from langchain.chat_models import ChatOpenAI from langchain.prompts import ChatPromptTemplate import gradio as gr import os import collections # Schema schema = { "properties": { "keyword": {"type": "string"}, "category": {"type": "string"}, }, "required": ["keyword", "category"], } # Input prompt = ChatPromptTemplate.from_messages( [ ("system", "You are an expert marketing researcher"), ("human", """{prompt_input}. Here you have the categories splitted by coma: {categories}. and Here you have the keywords splitted by coma: {keywords}."""), ("human", "Tip: Make sure to answer in the correct format and DO NOT leave keywords without category and DO NOT skip keywords. Please categorize all the keywords that I give you, each keyword must have just one and only one category."), ] ) prompt_no_cat = ChatPromptTemplate.from_messages( [ ("system", "You are an expert marketing researcher"), ("human", """{prompt_input}. and Here you have the keywords splitted by coma: {keywords}."""), ("human", "Tip: Make sure to answer in the correct format and DO NOT leave keywords without category and DO NOT skip keywords. Please categorize all the keywords that I give you, each keyword must have just one and only one category."), ] ) llm = ChatOpenAI(temperature=0, openai_api_key=os.getenv("OpenAI_APIKEY"), model="gpt-3.5-turbo") chain = create_extraction_chain(schema, llm, prompt, verbose=1) chain_no_cat = create_extraction_chain(schema, llm, prompt_no_cat, verbose=1) def run_chain(input_prompt, keywords_file, categories_file=None, batch_size=50): results = [] batch_size = batch_size index = 0 try: keywords = pd.read_csv(keywords_file.name) except: keywords = pd.read_excel(keywords_file.name) if categories_file != None: try: categories = pd.read_csv(categories_file.name) except: categories = pd.read_excel(categories_file.name) categories = list(categories[categories.columns[0]].values) keywords = list(keywords[keywords.columns[0]].values) while index < len(keywords): try: batch = keywords[index:index+batch_size] except: batch = keywords[index:] try: result = chain.run({'prompt_input':input_prompt, 'categories':','.join(categories), 'keywords':','.join(batch)}) except Exception as E: print('this batch did not worked from {} to {}'.format(index, index + batch_size)) print(E) result = [] results += result index += batch_size results_to_csv(results) #print((index, batch_size, len(keywords))) return results, 'themes_results.csv' else: keywords = list(keywords[keywords.columns[0]].values) batch_size = len(keywords) while index < len(keywords): try: batch = keywords[index:index+batch_size] except: batch = keywords[index:] try: result = chain_no_cat.run({'prompt_input':input_prompt, 'keywords':','.join(batch)}) except Exception as E: print('this batch did not worked from {} to {}'.format(index, index + batch_size)) print(E) result = [] results += result index += batch_size results_to_csv(results) #print((index, batch_size, len(keywords))) return results, 'themes_results.csv' def results_to_csv(results): super_dict = collections.defaultdict(list) for d in results: for k, v in d.items(): # d.items() in Python 3+ super_dict[k].append(v) pd.DataFrame(super_dict).to_csv('themes_results.csv', index=False) with gr.Blocks() as demo: prompt_input = gr.Text("""I need your help to analyze and categorize the provided list of keywords into the appropriate categories. The goal is to understand information demand on search engines within this industry. Each keyword represents a search and it should have a relation with the category. Extract each keyword and assign the best category among the given categories. Return every keyword with the relative category in pairs. If the categories are not given """) gr.Markdown("Upload CSV or xlsx with keywords: Just a csv with all the keywords in one column. Should have a header") keywords_file = gr.File(file_types=['csv', 'xlsx'], label='keywords') gr.Markdown("Upload CSV or xlsx with categories: Just a csv with all the keywords in one column. Should have a header") categories_file = gr.File(file_types=['.csv', '.xlsx'], label='categories') btn = gr.Button(value="Run with categories") btn2 = gr.Button(value="Run without categories") txt_3 = gr.Textbox(value="", label="Output") output_file = gr.File(label="Output File", file_count="single", file_types=["", ".", ".csv",".xls",".xlsx"]) btn.click(run_chain, inputs=[prompt_input, keywords_file, categories_file], outputs=[txt_3, output_file]) btn2.click(run_chain, inputs=[prompt_input, keywords_file], outputs=[txt_3, output_file]) demo.launch()