EE21 commited on
Commit
845351c
1 Parent(s): 2313689

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -22
app.py CHANGED
@@ -7,35 +7,14 @@ from extractive_summarization import summarize_with_textrank, summarize_with_lsa
7
  from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
8
  from keyword_extraction import extract_keywords
9
  from keyphrase_extraction import extract_sentences_with_obligations
10
- from transformers import AutoModelForTokenClassification, AutoTokenizer
11
- import torch
12
  #from blanc import BlancHelp
13
 
14
-
15
  # Load in ToS
16
  dataset = load_dataset("EE21/ToS-Summaries")
17
 
18
 
19
- model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
20
- tokenizer = AutoTokenizer.from_pretrained(model_name)
21
- model = AutoModelForTokenClassification.from_pretrained(model_name)
22
-
23
- def extract_organization_names(text):
24
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
25
- outputs = model(**inputs)
26
-
27
- predictions = torch.argmax(outputs.logits, dim=2)
28
- entities = [tokenizer.convert_ids_to_tokens(inputs.input_ids[0][idx]) for idx, pred in enumerate(predictions[0]) if model.config.id2label[pred.item()] == 'B-ORG']
29
- return " ".join(entities)
30
-
31
- # Apply this function to your dataset
32
- tos_titles = [extract_organization_names(doc['plain_text']) for doc in dataset['train']]
33
-
34
-
35
  # Extract titles or identifiers for the ToS
36
- #tos_titles = [f"Document {i}" for i in range(len(dataset['train']))]
37
-
38
-
39
 
40
 
41
  # Set page to wide mode
 
7
  from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
8
  from keyword_extraction import extract_keywords
9
  from keyphrase_extraction import extract_sentences_with_obligations
 
 
10
  #from blanc import BlancHelp
11
 
 
12
  # Load in ToS
13
  dataset = load_dataset("EE21/ToS-Summaries")
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Extract titles or identifiers for the ToS
17
+ tos_titles = [f"Document {i}" for i in range(len(dataset['train']))]
 
 
18
 
19
 
20
  # Set page to wide mode