import gradio as gr from mistralai.client import MistralClient from mistralai.models.chat_completion import ChatMessage import os import pandas as pd import numpy as np from groq import Groq import requests from bs4 import BeautifulSoup def extract_statuses(url): # Send a GET request to the webpage response = requests.get(url) # Parse the webpage content soup = BeautifulSoup(response.content, 'html.parser') # Find all links in the webpage links = soup.find_all('a') # Identify and download the Excel file for link in links: href = link.get('href') if href and (href.endswith('.xls') or href.endswith('.xlsx')): excel_url = href if href.startswith('http') else url + href excel_response = requests.get(excel_url) file_name = 'guide_status.xlsx' #excel_url.split('/')[-1] # Save the file with open(file_name, 'wb') as f: f.write(excel_response.content) # Read the Excel file df = pd.read_excel(file_name) # Check if 'TDoc Status' column exists and extract unique statuses if 'TDoc Status' in df.columns: unique_statuses = df['TDoc Status'].unique().tolist() print(f'Downloaded {file_name} and extracted statuses: {unique_statuses}') if 'withdrawn' in unique_statuses: unique_statuses.remove('withdrawn') return unique_statuses else: print(f"'TDoc Status' column not found in {file_name}") return [] def ask_llm(query, input, client_index): messages = [ { "role": "system", "content": f"You are a helpful assistant. Only show your final response to the **User Query**! Do not provide any explanations or details: \n# User Query:\n{query}." }, { "role": "user", "content": f"{input}", } ] if client_index == 0: client = Groq(api_key=os.environ["GROQ_API_KEY"]) chat_completion = client.chat.completions.create( messages=messages, model='mixtral-8x7b-32768', ) else: client = MistralClient(api_key=os.environ['MISTRAL_API_KEY']) chat_completion = client.chat( messages=messages, model='mistral-small-latest', ) return chat_completion.choices[0].message.content def filter_df(df, column_name, keywords): if len(keywords)>0: if column_name in df.columns: contains_keyword = lambda x: any(keyword.lower() in (x.lower() if type(x)==str else '') for keyword in keywords) filtered_df = df[df[column_name].apply(contains_keyword)] else: contains_keyword = lambda row: any(keyword.lower() in (str(cell).lower() if isinstance(cell, str) else '') for keyword in keywords for cell in row) filtered_df = df[df.apply(contains_keyword, axis=1)] else: filtered_df = df return filtered_df def chat_with_mistral(source_cols, dest_col, prompt, excel_file, url, search_col, keywords, client): print(f'xlsxfile = {excel_file}') df = pd.read_excel(excel_file) df[dest_col] = "" try: file_name = url.split("/")[-2] + ".xlsx" except: file_name = excel_file print(f"Keywords: {keywords}") filtred_df = filter_df(df, search_col, keywords) for index, row in filtred_df.iterrows(): concatenated_content = "\n\n".join(f"{column_name}: {str(row[column_name])}" for column_name in source_cols) llm_answer = ask_llm(prompt, concatenated_content, client) print(f"QUERY:\n{prompt}\nCONTENT:\n{concatenated_content[:200]}...\n\nANSWER:\n{llm_answer}") df.at[index, dest_col] = llm_answer df.to_excel(file_name, index=False) return file_name, df.head(5) def get_columns(file): if file is not None: df = pd.read_excel(file) columns = list(df.columns) return gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns + [""]), gr.update(choices=columns + ['[ALL]']), df.head(5) else: return gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), pd.DataFrame()