|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
import evaluate |
|
|
|
|
|
perplexity = evaluate.load("perplexity", module_type="metric") |
|
|
|
|
|
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") |
|
|
|
def compute_perplexity(message): |
|
|
|
messages = [{"role": "user", "content": message}] |
|
|
|
|
|
response = client.chat_completion( |
|
messages, |
|
max_tokens=512, |
|
stream=False, |
|
temperature=0.7, |
|
top_p=0.95 |
|
) |
|
|
|
|
|
generated_text = response.choices[0].delta.content |
|
|
|
|
|
perplexity_results = perplexity.compute(model_id='gpt2', add_start_token=False, predictions=[generated_text]) |
|
perplexity_value = perplexity_results['perplexity'] |
|
|
|
|
|
return f"Perplexity of the response: {perplexity_value}" |
|
|
|
|
|
demo = gr.Interface( |
|
fn=compute_perplexity, |
|
inputs="text", |
|
outputs="text", |
|
title="Compute Perplexity", |
|
description="Enter a text to compute its perplexity based on the gpt2 model." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|