import gradio as gr from mistralai.client import MistralClient from mistralai.models.chat_completion import ChatMessage import os import pandas as pd import numpy as np from groq import Groq import anthropic from users_management import update_json, users from code_df_custom import load_excel import zipfile from openai import * import time #users = ['maksG', 'AlmaA', 'YchK'] def ask_llm(query, user_input, client_index, user, keys): messages = [ { "role": "system", "content": f"You are a helpful assistant. Only show your final response to the **User Query**! Do not provide any explanations or details: \n# User Query:\n{query}." }, { "role": "user", "content": user_input, } ] systemC = messages[0]["content"] messageC = [{ "role": "user", "content": [{ "type": "text", "text": user_input }] }] try: if "Mistral" in client_index: client = MistralClient(api_key=os.environ[user['api_keys']['mistral']]) model_map = { "Mistral Tiny": "mistral-tiny", "Mistral Small": "mistral-small-latest", "Mistral Medium": "mistral-medium", } chat_completion = client.chat(messages=messages, model=model_map[client_index]) elif "Claude" in client_index: client = anthropic.Anthropic(api_key=os.environ[user['api_keys']['claude']]) model_map = { "Claude Sonnet": "claude-3-sonnet-20240229", "Claude Opus": "claude-3-opus-20240229", } response = client.messages.create( model=model_map[client_index], max_tokens=350, temperature=0, system=systemC, messages=messageC ) return response.content[0].text elif "GPT 4o" in client_index: client = OpenAI(api_key=os.environ["OPENAI_YCHK"]) response = client.chat.completions.create( model="gpt-4o", messages=messageC ) return response.choices[0][message][content].text elif "Perplexity" in client_index: client = OpenAI(api_key=os.environ["PERPLEXITY_ALMAA"], base_url="https://api.perplexity.ai") model_map = { "Perplexity Llama3 70b": "llama-3-70b-instruct", "Perplexity Llama3 8b": "llama-3-8b-instruct", "Perplexity Llama3 Sonar Small": "llama-3-sonar-small-32k-chat", "Perplexity Llama3 Sonar Large": "llama-3-sonar-large-32k-chat" } response = client.chat.completions.create( model=model_map[client_index], messages=messageC ) responseContent = str(response.choices[0].message.content) print(responseContent) return responseContent,keys elif "Groq" in client_index: try: client = Groq(api_key= os.getenv(keys[0])) model_map = { "Groq Mixtral": "mixtral-8x7b-32768", "Groq Llama3 70b": "llama3-70b-8192", "Groq Llama3 8b": "llama3-8b-8192" } chat_completion = client.chat.completions.create( messages=messages, model=model_map[client_index], ) response = chat_completion.choices[0].message.content except Exception as e: print("Change key") if keys[0] == keys[1][0]: keys[0] = keys[1][1] elif keys[0] == keys[1][1]: keys[0] = keys[1][2] else: keys[0] = keys[1][0] client = Groq(api_key= os.getenv(keys[0])) chat_completion = client.chat.completions.create( messages=messages, model='llama3-8b-8192', ) response = chat_completion.choices[0].message.content else: raise ValueError("Unsupported client index provided") # Return the response, handling the structure specific to Groq and Mistral clients. return chat_completion.choices[0].message.content,keys if client_index != "Claude" else chat_completion except (BadRequestError) as e: model_id = "meta-llama/Meta-Llama-3-70B-Instruct" access_token = os.getenv("HUGGINGFACE_SPLITFILES_API_KEY") tokenizer = AutoTokenizer.from_pretrained( model_id, padding_side="left", token = access_token ) user_input_tokenized = tokenizer.encode(user_input) messages = [] while len(user_input_tokenized) > max_token: user_input_divided = tokenizer.decode(user_input_tokenized[:max_token]) messages.append([ { "role": "system", "content": f"You are a helpful assistant. Only show your final response to the **User Query**! Do not provide any explanations or details: \n# User Query:\n{query}." }, { "role": "user", "content": user_input_divided, }]) user_input_tokenized = user_input_tokenized[max_token:] responses = [] print(len(messages)) for msg in messages: responses.append(client.chat.completions.create( model=model_map["Perplexity Llama3 70b"], messages=msg )) response = "" for resp in responses: response += " " + resp.choices[0].message.content return response except (RateLimitError) as e: #if model_user in keys: #Swap those keys # return f() #else: #get eepy time.sleep(60) return ask_llm(query, user_input, client_index, user, keys) except Exception as e: print(e) return "unhandled error",keys if client_index != "Claude" else chat_completion def filter_df(df, column_name, keywords): if len(keywords)>0: if column_name in df.columns: contains_keyword = lambda x: any(keyword.lower() in (x.lower() if type(x)==str else '') for keyword in keywords) filtered_df = df[df[column_name].apply(contains_keyword)] else: contains_keyword = lambda row: any(keyword.lower() in (str(cell).lower() if isinstance(cell, str) else '') for keyword in keywords for cell in row) filtered_df = df[df.apply(contains_keyword, axis=1)] else: filtered_df = df return filtered_df def chat_with_mistral(source_cols, dest_col, prompt, excel_file, url, search_col, keywords, client, user): # API Keys for Groq : KEYS = ['GROQ_API_KEY1', 'GROQ_API_KEY2', 'GROQ_API_KEY3'] GroqKey = KEYS[0] gloabal_keys = [GroqKey, KEYS] new_prompts, new_keywords, new_user, conf_file_path = update_json(user, prompt, keywords) print(f'xlsxfile = {excel_file}') df = pd.read_excel(excel_file) df[dest_col] = "" if excel_file: file_name = excel_file.split('.xlsx')[0] + "_with_" + dest_col.replace(' ', '_') + ".xlsx" elif url.endswith('Docs/', 'Docs'): file_name = url.split("/Docs")[0].split("/")[-1] + ".xlsx" else: file_name = "meeting_recap_grid.xlsx" print(f"Keywords: {keywords}") filtred_df = filter_df(df, search_col, keywords) cpt = 1 checkpoint = 50 for index, row in filtred_df.iterrows(): concatenated_content = "\n\n".join(f"{column_name}: {str(row[column_name])}" for column_name in source_cols) if not concatenated_content == "\n\n".join(f"{column_name}: nan" for column_name in source_cols): try: llm_answer,gloabal_keys = ask_llm(prompt[0], concatenated_content, client, user, gloabal_keys) except Exception: print("Catched a error : Global exception for ask_llm") llm_answer = "unhandled global error" print(f"{cpt}/{len(filtred_df)}\nQUERY:\n{prompt[0]}\nCONTENT:\n{concatenated_content[:200]}...\n\nANSWER:\n{llm_answer}") df.at[index, dest_col] = llm_answer try: if cpt == checkpoint: df.to_excel("checkpointfile.xlsx", index=False) checkpoint += 1 except Exception as e: print(f"no checkpoint : {e}") cpt += 1 # progress((index+1)/len(df),desc=f'Request {index+1}/{len(df)}!') df.to_excel(file_name, index=False) print(checkpoints_files) zip_file_path = 'config_file.zip' with zipfile.ZipFile(zip_file_path, 'w') as zipf: zipf.write(conf_file_path, os.path.basename(conf_file_path)) return file_name, df.head(5), new_prompts, new_keywords, new_user, zip_file_path, "checkpointfile.xlsx" def get_columns(file,progress=gr.Progress()): if file is not None: #df = pd.read_excel(file) filename, df = load_excel(file) columns = list(df.columns) return gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns + [""]), gr.update(choices=columns + ['[ALL]']), df.head(5), filename, df else: return gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), pd.DataFrame(), '', pd.DataFrame()