AKSelectionPredictor

Running on Zero

App Files Files Community

yuntian-deng commited on Apr 26, 2024

Commit

1d82bda

verified ·

1 Parent(s): 07b94f3

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -2

app.py CHANGED Viewed

@@ -1,13 +1,50 @@
 import gradio as gr
 import json
 # Load your validation set
 #with open('validation_data.json', 'r') as file:
 #    validation_data = json.load(file)
 def predict(title, authors, abstract):
     # Your model prediction logic here
-    score = 0.5 #model_inference(title, authors, abstract)  # Replace this with your actual model inference
     # Calculate precision for scores >= the predicted score
     #selected = [d for d in validation_data if d['score'] >= score]
@@ -15,7 +52,7 @@ def predict(title, authors, abstract):
     #precision = true_positives / len(selected) if selected else 0
     precision = 0.2
-    result = f"For papers with a score greater than or equal to {score:.2f}, approximately {precision * 100:.2f}% are selected by AK."
     return score, result

 import gradio as gr
 import json
+import re
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model_name = 'yuntian-deng/ak-paper-selection-deberta'
+max_length = 512
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+model.eval()
+if torch.cuda.is_available():
+    model.cuda()
+def normalize_spaces(text):
+    return re.sub(r'\s+', ' ', text).strip()
 # Load your validation set
 #with open('validation_data.json', 'r') as file:
 #    validation_data = json.load(file)
+def fill_template(title, authors, abstract):
+    title = normalize_spaces(x['title'].replace('\n', ' '))
+    authors = ', '.join([author.strip() for author in authors.split(',')])
+    abstract = normalize_spaces(abstract.replace('\n', ' '))
+    text = f"""Title: {title}
+Authors: {authors}
+Abstract: {abstract}"""
+    return text
+@torch.no_grad()
+def model_inference(title, authors, abstract):
+    text = fill_template(title, authors, abstract)
+    print (text)
+    inputs = tokenizer([text], return_tensors="pt", truncation=True, max_length=max_length)
+    if torch.cuda.is_available():
+        inputs = {key: value.cuda() for key, value in inputs.items()}
+    outputs = model(**inputs)
+    logits = outputs.logits
+    probs = logits.softmax(dim=-1).view(-1)
+    score = probs[1].item()
+    return score
 def predict(title, authors, abstract):
     # Your model prediction logic here
+    score = model_inference(title, authors, abstract)
     # Calculate precision for scores >= the predicted score
     #selected = [d for d in validation_data if d['score'] >= score]
     #precision = true_positives / len(selected) if selected else 0
     precision = 0.2
+    result = f"Your score: {score:.2f}.\nFor papers with score >= {score:.2f}, {precision * 100:.2f}% are selected by AK."
     return score, result