Spaces:

zhenyundeng
/

fact-checking-api

Sleeping

App Files Files Community

zhenyundeng commited on Sep 7, 2024

Commit

0db7c43

1 Parent(s): 0c5727b

update

Browse files

Files changed (1) hide show

app.py +13 -5

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 # from averitec.models.AveritecModule import Wikipediaretriever, Googleretriever, veracity_prediction, justification_generation
 import uvicorn
 app = FastAPI()
@@ -35,6 +36,7 @@ wiki_wiki = wikipediaapi.Wikipedia('AVeriTeC (zd302@cam.ac.uk)', 'en')
 import nltk
 nltk.download('punkt')
 from nltk import pos_tag, word_tokenize, sent_tokenize
 import spacy
@@ -74,15 +76,19 @@ LABEL = [
 ]
 # Veracity
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 veracity_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
 bert_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4, problem_type="single_label_classification")
 veracity_checkpoint_path = os.getcwd() + "/averitec/pretrained_models/bert_veracity.ckpt"
 veracity_model = SequenceClassificationModule.load_from_checkpoint(veracity_checkpoint_path,tokenizer=veracity_tokenizer, model=bert_model).to(device)
 # Justification
 justification_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large', add_prefix_space=True)
 bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")
-best_checkpoint = os.getcwd()+ '/averitec/pretrained_models/bart_justifications_verdict-epoch=13-val_loss=2.03-val_meteor=0.28.ckpt'
 justification_model = JustificationGenerationModule.load_from_checkpoint(best_checkpoint, tokenizer=justification_tokenizer, model=bart_model).to(device)
 # ---------------------------------------------------------------------------
@@ -259,7 +265,7 @@ class SequenceClassificationDataLoader(pl.LightningDataModule):
                 + bool_explanation
         )
 def veracity_prediction(claim, evidence):
     dataLoader = SequenceClassificationDataLoader(
         tokenizer=veracity_tokenizer,
@@ -277,8 +283,8 @@ def veracity_prediction(claim, evidence):
         return pred_label
     tokenized_strings, attention_mask = dataLoader.tokenize_strings(evidence_strings)
-    example_support = torch.argmax(
-        veracity_model(tokenized_strings.to(device), attention_mask=attention_mask.to(device)).logits, axis=1)
     has_unanswerable = False
     has_true = False
@@ -335,11 +341,12 @@ def extract_claim_str(claim, evidence, verdict_label):
     return claim_str
 def justification_generation(claim, evidence, verdict_label):
     #
     claim_str = extract_claim_str(claim, evidence, verdict_label)
     claim_str.strip()
     pred_justification = justification_model.generate(claim_str, device=device)
     return pred_justification.strip()
@@ -362,6 +369,7 @@ def log_on_azure(file, logs, azure_share_client):
     file_client.upload_file(logs)
 @app.post("/predict/")
 def fact_checking(item: Item):
     # claim = item['claim']

 from pydantic import BaseModel
 # from averitec.models.AveritecModule import Wikipediaretriever, Googleretriever, veracity_prediction, justification_generation
 import uvicorn
+# import spaces
 app = FastAPI()
 import nltk
 nltk.download('punkt')
+nltk.download('punkt_tab')
 from nltk import pos_tag, word_tokenize, sent_tokenize
 import spacy
 ]
 # Veracity
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 veracity_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
 bert_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4, problem_type="single_label_classification")
 veracity_checkpoint_path = os.getcwd() + "/averitec/pretrained_models/bert_veracity.ckpt"
+# veracity_model = SequenceClassificationModule.load_from_checkpoint(veracity_checkpoint_path,tokenizer=veracity_tokenizer, model=bert_model).to('cuda')
 veracity_model = SequenceClassificationModule.load_from_checkpoint(veracity_checkpoint_path,tokenizer=veracity_tokenizer, model=bert_model).to(device)
 # Justification
 justification_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large', add_prefix_space=True)
 bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")
+best_checkpoint = os.getcwd() + '/averitec/pretrained_models/bart_justifications_verdict-epoch=13-val_loss=2.03-val_meteor=0.28.ckpt'
+# justification_model = JustificationGenerationModule.load_from_checkpoint(best_checkpoint, tokenizer=justification_tokenizer, model=bart_model).to('cuda')
 justification_model = JustificationGenerationModule.load_from_checkpoint(best_checkpoint, tokenizer=justification_tokenizer, model=bart_model).to(device)
 # ---------------------------------------------------------------------------
                 + bool_explanation
         )
+# @spaces.GPU
 def veracity_prediction(claim, evidence):
     dataLoader = SequenceClassificationDataLoader(
         tokenizer=veracity_tokenizer,
         return pred_label
     tokenized_strings, attention_mask = dataLoader.tokenize_strings(evidence_strings)
+    # example_support = torch.argmax(veracity_model(tokenized_strings.to('cuda'), attention_mask=attention_mask.to('cuda')).logits, axis=1)
+    example_support = torch.argmax(veracity_model(tokenized_strings.to(device), attention_mask=attention_mask.to(device)).logits, axis=1)
     has_unanswerable = False
     has_true = False
     return claim_str
+# @spaces.GPU
 def justification_generation(claim, evidence, verdict_label):
     #
     claim_str = extract_claim_str(claim, evidence, verdict_label)
     claim_str.strip()
+    # pred_justification = justification_model.generate(claim_str, device='cuda')
     pred_justification = justification_model.generate(claim_str, device=device)
     return pred_justification.strip()
     file_client.upload_file(logs)
+# @spaces.GPU
 @app.post("/predict/")
 def fact_checking(item: Item):
     # claim = item['claim']