File size: 4,415 Bytes
070c576
 
 
 
 
 
6449689
f9c03d9
4425add
92d0a3c
4d5131c
6449689
410e03d
6449689
410e03d
 
 
 
4011ea8
 
410e03d
4011ea8
 
410e03d
 
 
 
 
 
 
 
 
6449689
410e03d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e949ec2
 
 
 
410e03d
 
 
 
 
 
 
 
 
 
4011ea8
410e03d
 
 
 
 
 
6449689
 
 
 
 
 
 
 
 
 
 
 
 
070c576
69d3bd4
cefc7a3
6449689
070c576
 
 
 
 
 
6449689
070c576
6449689
070c576
6449689
 
4011ea8
 
 
4d5131c
19fca94
4011ea8
070c576
6449689
cefc7a3
070c576
4011ea8
 
070c576
 
853deb7
 
070c576
853deb7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import gradio as gr
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
import os
import pandas as pd
import numpy as np
from groq import Groq
import anthropic
from users_management import update_json, users

#users = ['maksG', 'AlmaA', 'YchK']

def ask_llm(query, input, client_index):
    messages = [
        {
            "role": "system",
            "content": f"You are a helpful assistant. Only show your final response to the **User Query**! Do not provide any explanations or details: \n# User Query:\n{query}."
        },
        {
            "role": "user",
            "content": input,
        }
    ]
    
    systemC = messages[0]["content"]
    messageC = [{
        "role": "user",
        "content": [{
            "type": "text",
            "text": input
        }]
    }]

    if "Mistral" in client_index:
        client = MistralClient(api_key=os.environ['MISTRAL_API_KEY'])
        model_map = {
            "Mistral Tiny": "mistral-tiny",
            "Mistral Small": "mistral-small-latest",
            "Mistral Medium": "mistral-medium",
        }
        chat_completion = client.chat(messages=messages, model=model_map[client_index])

    elif "Claude" in client_index:
        client = anthropic.Anthropic(api_key=os.environ['CLAUDE_API_KEY'])
        model_map = {
            "Claude Sonnet": "claude-3-sonnet-20240229",
            "Claude Opus": "claude-3-opus-20240229",
        }
        response = client.messages.create(
            model=model_map[client_index],
            max_tokens=350,
            temperature=0,
            system=systemC,
            messages=messageC
        )
        return response.content[0].text

    elif client_index == "Groq (mixtral)":
        client = Groq(api_key=os.environ["GROQ_API_KEY"])
        chat_completion = client.chat.completions.create(
            messages=messages,
            model='mixtral-8x7b-32768',
        )

    else:
        raise ValueError("Unsupported client index provided")

    # Return the response, handling the structure specific to Groq and Mistral clients.
    return chat_completion.choices[0].message.content if client_index != "Claude" else chat_completion




def filter_df(df, column_name, keywords):
    if len(keywords)>0:
        if column_name in df.columns:
            contains_keyword = lambda x: any(keyword.lower() in (x.lower() if type(x)==str else '') for keyword in keywords)
            filtered_df = df[df[column_name].apply(contains_keyword)]
        else:
            contains_keyword = lambda row: any(keyword.lower() in (str(cell).lower() if isinstance(cell, str) else '') for keyword in keywords for cell in row)
            filtered_df = df[df.apply(contains_keyword, axis=1)]
    else:
        filtered_df = df
    return filtered_df

def chat_with_mistral(source_cols, dest_col, prompt, excel_file, url, search_col, keywords, client, user):
    new_prompts, new_keywords = update_json(user, prompt, keywords)
    print(f'xlsxfile = {excel_file}')
    df = pd.read_excel(excel_file)
    df[dest_col] = ""
    try:
      file_name = url.split("/")[-2] + ".xlsx"
    except:
      file_name = excel_file
    print(f"Keywords: {keywords}")

    filtred_df = filter_df(df, search_col, keywords)

    for index, row in filtred_df.iterrows():
        concatenated_content = "\n\n".join(f"{column_name}: {str(row[column_name])}" for column_name in source_cols)
        print('test')
        if not concatenated_content == "\n\n".join(f"{column_name}: nan" for column_name in source_cols):
            print('c bon')
            llm_answer = ask_llm(prompt[0], concatenated_content, client, user)
            print(f"QUERY:\n{prompt[0]}\nCONTENT:\n{concatenated_content[:200]}...\n\nANSWER:\n{llm_answer}")
            df.at[index, dest_col] = llm_answer

    df.to_excel(file_name, index=False)
    return file_name, df.head(5), new_prompts, new_keywords


def get_columns(file,progress=gr.Progress()):
    if file is not None:
        df = pd.read_excel(file)
        columns = list(df.columns)
        return gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns + [""]), gr.update(choices=columns + ['[ALL]']), df.head(5)
    else:
        return gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), pd.DataFrame()