🚀 Mistral 7B vs LLama3 8B 🦙

import os
import gradio as gr
import requests
import json

hf_token = os.getenv("HF_TOKEN")
def query_llama(text):
    API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
    headers = {"Authorization": f"Bearer {hf_token}"}  # Use the token from environment variable
    payload = {"inputs": text}
    
    try:
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()  # Raises an exception for 4XX/5XX errors
        data = response.json()
        print("Llama Response Data:", json.dumps(data, indent=2))  # Print formatted JSON response
        
        # Check if the response is a list containing a dictionary
        if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
            return data[0].get('generated_text', "Error: 'generated_text' key not found in response")
        else:
            return "Error: Invalid response format"
    except requests.RequestException as e:
        print("HTTP Error:", e)
        return "Error: HTTP request failed"
    except KeyError:
        print("Key Error: 'generated_text' not in response")
        print("Response content:", response.text)
        return "Error: 'generated_text' key not found in response"


def query_mistral(text):
    API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
    headers = {"Authorization": f"Bearer {hf_token}"}  # Use the token from environment variable
    payload = {"inputs": text}
    
    try:
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()  # Raises an exception for 4XX/5XX errors
        data = response.json()
        print("Mistral Response Data:", json.dumps(data, indent=2))  # Print formatted JSON response
        
        # Check if the response is a list containing a dictionary
        if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
            return data[0].get('generated_text', "Error: 'generated_text' key not found in response")
        else:
            return "Error: Invalid response format"
    except requests.RequestException as e:
        print("HTTP Error:", e)
        return "Error: HTTP request failed"
    except KeyError:
        print("Key Error: 'generated_text' not in response")
        print("Response content:", response.text)
        return "Error: 'generated_text' key not found in response"


def chat_with_models(text):
    llama_response = query_llama(text)
    mistral_response = query_mistral(text)
    
    return llama_response, mistral_response


with gr.Blocks() as demo:
    gr.Markdown("<h1>🚀 Mistral 7B vs LLama3 8B 🦙</h1>")
    gr.Markdown("<h3> 🕹️ Compare the performance and responses of two powerful models, Mistral 7B and LLama3 8B instruct. Type your questions or prompts below and see how each model responds to the same input 👾 </h3>")
    with gr.Row():
        input_text = gr.Textbox(label="Enter your prompt here:", placeholder="Type something...", lines=2)
        submit_button = gr.Button("Submit")
    output_llama = gr.Textbox(label="Llama 3 8B 👾", placeholder="", lines=10, interactive=False)
    output_mistral = gr.Textbox(label="Mistral 7B 🌠", placeholder="", lines=10, interactive=False)
    
    submit_button.click(fn=chat_with_models, inputs=input_text, outputs=[output_llama, output_mistral])

if __name__ == "__main__":
    demo.launch()