Spaces:
Sleeping
Sleeping
File size: 1,357 Bytes
34f203c 36c2546 34f203c 36c2546 34f203c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from bs4 import BeautifulSoup
import requests
def getText(url : str):
response = requests.get(url)
if response.status_code == 200:
html_content = response.content
else:
print(f"[INFO] couldn't access website data, try again")
return
soup = BeautifulSoup(html_content, 'html.parser')
text_elements = soup.find_all(['p'])
scraped_text = ' '.join(element.get_text() for element in text_elements)
if len(scraped_text) > 20000:
print(f"[ERROR] page too large to perform qna")
return
return scraped_text
from transformers import T5Tokenizer, T5ForConditionalGeneration
model = T5ForConditionalGeneration.from_pretrained('google/flan-t5-large')
tokenizer = T5Tokenizer.from_pretrained('google/flan-t5-large')
def getAnswer(url : str, question : str):
context = getText(url)
inputs = tokenizer(f"context : {context}, question : {question}", return_tensors = 'pt').input_ids
outputs = model.generate(
inputs,
min_length = 10,
max_new_tokens = 600,
length_penalty = 1,
num_beams = 3,
no_repeat_ngram_size = 3,
temperature = 0.7,
top_k = 110,
top_p = 0.8,
repetition_penalty = 2.1
)
answer = tokenizer.decode(outputs[0], skip_special_tokens = True)
return answer |