zhenyundeng commited on
Commit
ddd6481
·
1 Parent(s): 6bf7515
Files changed (1) hide show
  1. app.py +43 -30
app.py CHANGED
@@ -12,34 +12,67 @@ app = FastAPI()
12
 
13
  # ---------------------------------------------------------------------------------------------------------------------
14
  import gradio as gr
15
- import os
16
- import torch
17
- import json
18
  import tqdm
19
- from time import sleep
20
  import numpy as np
21
- import requests
22
- from rank_bm25 import BM25Okapi
23
- from bs4 import BeautifulSoup
24
  from datetime import datetime
 
 
 
 
 
 
 
 
25
 
26
  from transformers import BartTokenizer, BartForConditionalGeneration
27
  from transformers import BloomTokenizerFast, BloomForCausalLM, BertTokenizer, BertForSequenceClassification
28
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
29
- import pytorch_lightning as pl
30
 
31
- from urllib.parse import urlparse
 
 
32
  from html2lines import url2lines
33
  from googleapiclient.discovery import build
34
  from averitec.models.DualEncoderModule import DualEncoderModule
35
  from averitec.models.SequenceClassificationModule import SequenceClassificationModule
36
  from averitec.models.JustificationGenerationModule import JustificationGenerationModule
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  # ---------------------------------------------------------------------------------------------------------------------
 
 
39
  import wikipediaapi
40
  wiki_wiki = wikipediaapi.Wikipedia('AVeriTeC (zd302@cam.ac.uk)', 'en')
41
 
42
  import nltk
 
 
43
  nltk.download('punkt')
44
  nltk.download('punkt_tab')
45
  from nltk import pos_tag, word_tokenize, sent_tokenize
@@ -125,30 +158,10 @@ tokenized_corpus1, prompt_corpus1 = generate_step2_reference_corpus(reference_fi
125
  prompt_bm25 = BM25Okapi(tokenized_corpus1)
126
 
127
  # ---------------------------------------------------------------------------------------------------------------------
128
- # ---------------------------------------------------------------------------
129
- # load .env
130
- from utils import create_user_id
131
- user_id = create_user_id()
132
-
133
- from azure.storage.fileshare import ShareServiceClient
134
- try:
135
- from dotenv import load_dotenv
136
- load_dotenv()
137
- except Exception as e:
138
- pass
139
-
140
- account_url = os.environ["AZURE_ACCOUNT_URL"]
141
- credential = {
142
- "account_key": os.environ['AZURE_ACCOUNT_KEY'],
143
- "account_name": os.environ['AZURE_ACCOUNT_NAME']
144
- }
145
-
146
- file_share_name = "averitec"
147
- azure_service = ShareServiceClient(account_url=account_url, credential=credential)
148
- azure_share_client = azure_service.get_share_client(file_share_name)
149
 
150
  # ---------- Setting ----------
151
  # ---------- Load Veracity and Justification prediction model ----------
 
152
  LABEL = [
153
  "Supported",
154
  "Refuted",
 
12
 
13
  # ---------------------------------------------------------------------------------------------------------------------
14
  import gradio as gr
 
 
 
15
  import tqdm
16
+ import torch
17
  import numpy as np
18
+ from time import sleep
 
 
19
  from datetime import datetime
20
+ import threading
21
+ import gc
22
+ import os
23
+ import json
24
+ import pytorch_lightning as pl
25
+ from urllib.parse import urlparse
26
+ from accelerate import Accelerator
27
+ import spaces
28
 
29
  from transformers import BartTokenizer, BartForConditionalGeneration
30
  from transformers import BloomTokenizerFast, BloomForCausalLM, BertTokenizer, BertForSequenceClassification
31
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
 
32
 
33
+ from rank_bm25 import BM25Okapi
34
+ # import bm25s
35
+ # import Stemmer # optional: for stemming
36
  from html2lines import url2lines
37
  from googleapiclient.discovery import build
38
  from averitec.models.DualEncoderModule import DualEncoderModule
39
  from averitec.models.SequenceClassificationModule import SequenceClassificationModule
40
  from averitec.models.JustificationGenerationModule import JustificationGenerationModule
41
+ from averitec.data.sample_claims import CLAIMS_Type
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # load .env
45
+ from utils import create_user_id
46
+ user_id = create_user_id()
47
+
48
+ from azure.storage.fileshare import ShareServiceClient
49
+ try:
50
+ from dotenv import load_dotenv
51
+ load_dotenv()
52
+ except Exception as e:
53
+ pass
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # os.environ["TOKENIZERS_PARALLELISM"] = "false"
57
+ account_url = os.environ["AZURE_ACCOUNT_URL"]
58
+ credential = {
59
+ "account_key": os.environ['AZURE_ACCOUNT_KEY'],
60
+ "account_name": os.environ['AZURE_ACCOUNT_NAME']
61
+ }
62
+
63
+ file_share_name = "averitec"
64
+ azure_service = ShareServiceClient(account_url=account_url, credential=credential)
65
+ azure_share_client = azure_service.get_share_client(file_share_name)
66
 
67
  # ---------------------------------------------------------------------------------------------------------------------
68
+ import requests
69
+ from bs4 import BeautifulSoup
70
  import wikipediaapi
71
  wiki_wiki = wikipediaapi.Wikipedia('AVeriTeC (zd302@cam.ac.uk)', 'en')
72
 
73
  import nltk
74
+ nltk.download('averaged_perceptron_tagger_eng')
75
+ nltk.download('averaged_perceptron_tagger')
76
  nltk.download('punkt')
77
  nltk.download('punkt_tab')
78
  from nltk import pos_tag, word_tokenize, sent_tokenize
 
158
  prompt_bm25 = BM25Okapi(tokenized_corpus1)
159
 
160
  # ---------------------------------------------------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  # ---------- Setting ----------
163
  # ---------- Load Veracity and Justification prediction model ----------
164
+ print("Loading models ...")
165
  LABEL = [
166
  "Supported",
167
  "Refuted",