Update app.py
Browse files
app.py
CHANGED
@@ -1,58 +1,3 @@
|
|
1 |
-
# from transformers import AutoTokenizer, AutoModelForCausalLM, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
|
2 |
-
# import torch
|
3 |
-
# import streamlit as st
|
4 |
-
|
5 |
-
# st.write("im here")
|
6 |
-
# # Load the pre-trained tokenizer and model
|
7 |
-
# tokenizer = AutoTokenizer.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B")
|
8 |
-
# model = AutoModelForCausalLM.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B")
|
9 |
-
|
10 |
-
|
11 |
-
# dataa = "My name is youssef khemiri i am 21 years old and i am a data scientist"
|
12 |
-
# st.write(dataa)
|
13 |
-
|
14 |
-
# # Prepare the dataset
|
15 |
-
# train_dataset = TextDataset(
|
16 |
-
# tokenizer=tokenizer,
|
17 |
-
# file_path=dataa,
|
18 |
-
# block_size=128,
|
19 |
-
# )
|
20 |
-
# st.write("hi1")
|
21 |
-
# # Prepare the data collator
|
22 |
-
# data_collator = DataCollatorForLanguageModeling(
|
23 |
-
# tokenizer=tokenizer, mlm=False,
|
24 |
-
# )
|
25 |
-
|
26 |
-
# # Initialize the trainer
|
27 |
-
# training_args = TrainingArguments(
|
28 |
-
# output_dir='./results', # output directory
|
29 |
-
# num_train_epochs=3, # total number of training epochs
|
30 |
-
# per_device_train_batch_size=16, # batch size per device during training
|
31 |
-
# save_steps=10_000, # number of steps between saving checkpoints
|
32 |
-
# save_total_limit=2, # limit the total amount of checkpoints to save
|
33 |
-
# prediction_loss_only=True,
|
34 |
-
# learning_rate=5e-5,
|
35 |
-
# )
|
36 |
-
# st.write("hi2")
|
37 |
-
|
38 |
-
# trainer = Trainer(
|
39 |
-
# model=model,
|
40 |
-
# args=training_args,
|
41 |
-
# train_dataset=train_dataset,
|
42 |
-
# data_collator=data_collator,
|
43 |
-
# )
|
44 |
-
|
45 |
-
# # Fine-tune the model
|
46 |
-
# trainer.train()
|
47 |
-
# st.write("finished training")
|
48 |
-
|
49 |
-
# # infer
|
50 |
-
# inputs = tokenizer("<human>: Tell me about youssef khemiri\n<bot>:", return_tensors='pt').to(model.device)
|
51 |
-
# outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
|
52 |
-
# output_str = tokenizer.decode(outputs[0])
|
53 |
-
# st.write(output_str)
|
54 |
-
|
55 |
-
|
56 |
from transformers import pipeline
|
57 |
import streamlit as st
|
58 |
from streamlit_chat import message
|
@@ -101,8 +46,8 @@ question = get_text()
|
|
101 |
# while True:
|
102 |
|
103 |
# Generate the answer using the model
|
104 |
-
answer = qa_model(question=question, context=context)
|
105 |
message(question,is_user=True)
|
|
|
106 |
message(answer)
|
107 |
|
108 |
# Print the answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from transformers import pipeline
|
2 |
import streamlit as st
|
3 |
from streamlit_chat import message
|
|
|
46 |
# while True:
|
47 |
|
48 |
# Generate the answer using the model
|
|
|
49 |
message(question,is_user=True)
|
50 |
+
answer = qa_model(question=question, context=context)
|
51 |
message(answer)
|
52 |
|
53 |
# Print the answer
|