vinayakdev commited on
Commit
c1b9d7d
1 Parent(s): f892f8d

Happy Hugging Face!

Browse files
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. gene +0 -0
  3. generator.py +115 -0
  4. main.py +8 -0
  5. requirements.txt +5 -0
  6. stream_lib.py +17 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
gene ADDED
File without changes
generator.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import transformers
3
+ from transformers import (
4
+ # Text2TextGenerationPipeline,
5
+ TFAutoModelForSeq2SeqLM as alwm,
6
+ # TokenClassificationPipeline,
7
+ # AutoModelForTokenClassification,
8
+ TFAutoModelForQuestionAnswering as amqa,
9
+ AutoTokenizer as att,
10
+ # BertTokenizer,
11
+ # AlbertTokenizer,
12
+ # BertForQuestionAnswering,
13
+ # AlbertForQuestionAnswering,
14
+ # T5Config,
15
+ # T5ForConditionalGeneration,
16
+ T5TokenizerFast,
17
+ PreTrainedTokenizer,
18
+ PreTrainedModel,
19
+ # ElectraTokenizer,
20
+ # ElectraForQuestionAnswering
21
+ )
22
+ import torch
23
+ import tensorflow
24
+ import string
25
+ import numpy as np
26
+ from transformers import pipeline
27
+ from transformers.pipelines import AggregationStrategy
28
+ import pickle
29
+
30
+ # sq_tokenizer = att.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
31
+ # sq_model = alwm.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
32
+ # text= "The abolition of feudal privileges by the National Constituent Assembly on 4 August 1789 and the Declaration \\nof the Rights of Man and of the Citizen (La Déclaration des Droits de l'Homme et du Citoyen), drafted by Lafayette \\nwith the help of Thomas Jefferson and adopted on 26 August, paved the way to a Constitutional Monarchy \\n(4 September 1791 – 21 September 1792). Despite these dramatic changes, life at the court continued, while the situation \\nin Paris was becoming critical because of bread shortages in September. On 5 October 1789, a crowd from Paris descended upon Versailles \\nand forced the royal family to move to the Tuileries Palace in Paris, where they lived under a form of house arrest under \\nthe watch of Lafayette's Garde Nationale, while the Comte de Provence and his wife were allowed to reside in the \\nPetit Luxembourg, where they remained until they went into exile on 20 June 1791."
33
+ # hftokenizer = pickle.load(open('models/hftokenizer.sav', 'rb'))
34
+ # hfmodel = pickle.load(open('models/hfmodel.sav', 'rb'))
35
+ hfmodel = alwm.from_pretrained("valhalla/t5-small-e2e-qg")
36
+
37
+ hftokenizer = T5TokenizerFast.from_pretrained("t5-small")
38
+ def run_model(input_string, **generator_args):
39
+ generator_args = {
40
+ "max_length": 256,
41
+ "num_beams": 4,
42
+ "length_penalty": 1.5,
43
+ "no_repeat_ngram_size": 3,
44
+ "early_stopping": True,
45
+ }
46
+ # tokenizer = att.from_pretrained("ThomasSimonini/t5-end2end-question-generation")
47
+ input_string = "generate questions: " + input_string + " </s>"
48
+ input_ids = hftokenizer.encode(input_string, return_tensors="pt")
49
+ res = hfmodel.generate(input_ids, **generator_args)
50
+ output = hftokenizer.batch_decode(res, skip_special_tokens=True)
51
+ output = [item.split("<sep>") for item in output]
52
+ return output
53
+
54
+ al_tokenizer = att.from_pretrained("deepset/electra-base-squad2")
55
+ al_model = amqa.from_pretrained("deepset/electra-base-squad2")
56
+
57
+ # al_model = pickle.load(open('models/al_model.sav', 'rb'))
58
+ # al_tokenizer = pickle.load(open('models/al_tokenizer.sav', 'rb'))
59
+ def QA(question, context):
60
+ # model_name="deepset/electra-base-squad2"
61
+ nlp = pipeline("question-answering",model=al_model,tokenizer=al_tokenizer)
62
+ format = {
63
+ 'question':question,
64
+ 'context':context
65
+ }
66
+ res = nlp(format)
67
+ output = f"{question}\n{string.capwords(res['answer'])}\tscore : [{res['score']}] \n"
68
+ return output
69
+ # inputs = tokenizer(question, context, return_tensors="pt")
70
+ # # Run the model, the deepset way
71
+ # with torch.no_grad():
72
+ # output = model(**inputs)
73
+ # start_score = output.start_logits
74
+ # end_score = output.end_logits
75
+ # #Get the rel scores for the context, and calculate the most probable begginign using torch
76
+ # start = torch.argmax(start_score)
77
+ # end = torch.argmax(end_score)
78
+ # #cinvert tokens to strings
79
+ # # output = tokenizer.decode(input_ids[start:end+1], skip_special_tokens=True)
80
+ # predict_answer_tokens = inputs.input_ids[0, start : end + 1]
81
+ # output = tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
82
+ # output = string.capwords(output)
83
+ # print(f"Q. {question} \n Ans. {output}")
84
+ # QA("What was the first C program","The first prgram written in C was Hello World")
85
+
86
+ def gen_question(inputs):
87
+
88
+ questions = run_model(inputs)
89
+
90
+ return questions
91
+
92
+ # string_query = "Hello World"
93
+ # gen_question(f"answer: {string_query} context: The first C program said {string_query} "). #The format of the query to generate questions
94
+
95
+
96
+ def read_file(filepath_name):
97
+ with open(text, "r") as infile:
98
+ contents = infile.read()
99
+ context = contents.replace("\n", " ")
100
+ return context
101
+
102
+ def create_string_for_generator(context):
103
+ gen_list = gen_question(context)
104
+ return (gen_list[0][0]).split('? ')
105
+
106
+ def creator(context):
107
+ questions = create_string_for_generator(context)
108
+ pairs = []
109
+ for ques in questions:
110
+ pair = QA(ques,context)
111
+ pairs.append(pair)
112
+ return pairs
113
+
114
+ # sentences = main_text.split('.')
115
+ # creator(sent)
main.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import generator
2
+ from generator import *
3
+
4
+
5
+
6
+ context = str(input("Enter paragraph\n"))
7
+ context.replace("\n", " ")
8
+ creator(context)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy==1.24.1
2
+ streamlit==1.16.0
3
+ tensorflow==2.11.0
4
+ torch==1.13.1
5
+ transformers==4.25.1
stream_lib.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import generator
3
+ from generator import *
4
+
5
+
6
+
7
+
8
+ text_ar = st.text_area("Enter text:")
9
+
10
+ if(st.button("Generate!")):
11
+ text_ar = text_ar.replace("\n"," ")
12
+ pairs = creator(text_ar)
13
+ for pair in pairs:
14
+ st.text(pair)
15
+
16
+
17
+