Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,623 Bytes
2ded88b 2860a50 0df179d 2ded88b 0df179d 2ded88b 9bed19f 2ded88b 0df179d 2860a50 0df179d 9bed19f 2860a50 0df179d 2ded88b 0df179d 2ded88b 9bed19f 0df179d 9bed19f bbc46f9 9bed19f bbc46f9 9bed19f 0df179d bbc46f9 0df179d 9bed19f 0df179d 2ded88b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import sys
import warnings
print("Warning: This application requires specific library versions. Please ensure you have the correct versions installed.")
import spaces
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import numpy as np
print(f"NumPy version: {np.__version__}")
print(f"PyTorch version: {torch.__version__}")
# Suppress CUDA initialization warning
warnings.filterwarnings("ignore", category=UserWarning, message="Can't initialize NVML")
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Model loading and setup
model_name = "jhu-clsp/FollowIR-7B"
try:
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
except ValueError as e:
print(f"Error loading model or tokenizer: {e}")
print("Please ensure you have the correct versions of transformers and sentencepiece installed.")
sys.exit(1)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
token_false_id = tokenizer.get_vocab()["false"]
token_true_id = tokenizer.get_vocab()["true"]
template = """<s> [INST] You are an expert Google searcher, whose job is to determine if the following document is relevant to the query (true/false). Answer using only one word, one of those two choices.
Query: {query}
Document: {text}
Relevant (only output one word, either "true" or "false"): [/INST] """
@spaces.GPU
def check_relevance(query, instruction, passage):
global model
global tokenizer
global template
global token_false_id
global token_true_id
if torch.cuda.is_available():
device = "cuda"
model = model.to(device)
full_query = f"{query} {instruction}"
prompt = template.format(query=full_query, text=passage)
tokens = tokenizer(
[prompt],
padding=True,
truncation=True,
return_tensors="pt",
pad_to_multiple_of=None,
)
for key in tokens:
tokens[key] = tokens[key].to(device)
with torch.no_grad():
batch_scores = model(**tokens).logits[:, -1, :]
true_vector = batch_scores[:, token_true_id]
false_vector = batch_scores[:, token_false_id]
batch_scores = torch.stack([false_vector, true_vector], dim=1)
batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
score = batch_scores[:, 1].exp().item()
return f"{score:.4f}"
# Example inputs
examples = [
[
"What movies were directed by James Cameron?",
"A relevant document would describe any movie that was directed by James Cameron but not any that are co-directed.",
"Avatar: The Way of Water is a 2022 American epic science fiction film co-produced and co-directed by James Cameron and Rick Jaffe."
],
[
"What movies were directed by James Cameron?",
"A relevant document would describe any movie that was directed by James Cameron but not any that are co-directed.",
"Avatar: The Way of Water is a 2022 American epic science fiction film co-produced and directed by James Cameron. Rick Jaffe helped write the script."
]
]
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Relevance Using Instructions")
gr.Markdown("This app uses the FollowIR-7B model to determine the relevance of a passage to a given query and instruction.")
with gr.Row():
with gr.Column():
query_input = gr.Textbox(label="Query", placeholder="Enter your search query here")
instruction_input = gr.Textbox(label="Instruction", placeholder="Enter additional instructions or criteria")
passage_input = gr.Textbox(label="Passage", placeholder="Enter the passage to check for relevance", lines=5)
submit_button = gr.Button("Check Relevance")
with gr.Column():
output = gr.Textbox(label="Relevance Probability")
gr.Examples(
examples=examples,
inputs=[query_input, instruction_input, passage_input],
outputs=output,
fn=check_relevance,
cache_examples=True,
)
submit_button.click(
check_relevance,
inputs=[query_input, instruction_input, passage_input],
outputs=[output]
)
if __name__ == "__main__":
if np.__version__.startswith("2."):
print("Error: This application is not compatible with NumPy 2.x. Please downgrade to NumPy < 2.0.0.")
sys.exit(1)
demo.launch() |