avanish07 commited on
Commit
baaf428
1 Parent(s): 9cd67df

Upload 4 files

Browse files
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from string import Template
5
+ import streamlit as st
6
+ import base64
7
+ from datasets import load_dataset
8
+ from datasets import Dataset
9
+ import torch
10
+ from tqdm import tqdm
11
+ from peft import LoraConfig, get_peft_model
12
+
13
+ import transformers
14
+ # from transformers import AutoModelForCausalLM, AdapterConfig
15
+ from transformers import AutoConfig,AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
16
+ from transformers import TrainingArguments
17
+
18
+ from peft import LoraConfig
19
+ from peft import *
20
+
21
+ from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
22
+ from langchain.prompts import PromptTemplate
23
+
24
+ from IPython.display import Markdown, display
25
+
26
+ peft_model_id = "Fine_Tuned"
27
+ config = PeftConfig.from_pretrained(peft_model_id)
28
+
29
+ bnb_config = BitsAndBytesConfig(
30
+ load_in_4bit=True,
31
+ bnb_4bit_use_double_quant=True,
32
+ bnb_4bit_quant_type="nf4",
33
+ bnb_4bit_compute_dtype=torch.bfloat16
34
+ )
35
+
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ config.base_model_name_or_path,
38
+ return_dict=True,
39
+ quantization_config=bnb_config,
40
+ device_map="auto",
41
+ trust_remote_code=True,
42
+ )
43
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
44
+ tokenizer.pad_token = tokenizer.eos_token
45
+
46
+
47
+ model = PeftModel.from_pretrained(model, peft_model_id)
48
+
49
+ prompt_template = """Answer the following multiple choice question by giving the most appropriate response. Answer should be one among [A, B, C, D, E] \
50
+ in order of the most likely to be correct to the least likely to be correct.'
51
+
52
+ Question: {prompt}\n
53
+ A) {a}\n
54
+ B) {b}\n
55
+ C) {c}\n
56
+ D) {d}\n
57
+ E) {e}\n
58
+
59
+ Answer: """
60
+
61
+ prompt = PromptTemplate(template=prompt_template, input_variables=['prompt', 'a', 'b', 'c', 'd', 'e'])
62
+
63
+
64
+ def format_text_to_prompt(example):
65
+ ans = prompt.format(prompt=example['prompt'],
66
+ a=example['A'],
67
+ b=example['B'],
68
+ c=example['C'],
69
+ d=example['D'],
70
+ e=example['E'])
71
+ return {"ans": ans}
72
+
73
+ def get_ans(text):
74
+ inputs = tokenizer(text, return_tensors='pt')
75
+ logits = model(input_ids=inputs['input_ids'].cuda(), attention_mask=inputs['attention_mask'].cuda()).logits[0, -1]
76
+
77
+ # Create a list of tuples having (logit, 'option') format
78
+ options_list = [(logits[tokenizer(' A').input_ids[-1]], 'A'), (logits[tokenizer(' B').input_ids[-1]], 'B'), (logits[tokenizer(' C').input_ids[-1]], 'C'), (logits[tokenizer(' D').input_ids[-1]], 'D'), (logits[tokenizer(' E').input_ids[-1]], 'E')]
79
+ options_list = sorted(options_list, reverse=True)
80
+ ans_list = []
81
+ for i in range(3):
82
+ ans_list.append(options_list[i][1])
83
+
84
+ return ans_list
85
+
86
+
87
+ def get_base64_of_bin_file(bin_file):
88
+ with open(bin_file, 'rb') as f:
89
+ data = f.read()
90
+ return base64.b64encode(data).decode()
91
+
92
+ def set_png_as_page_bg(png_file):
93
+ img = get_base64_of_bin_file(png_file)
94
+ page_bg_img = f"""
95
+ <style>
96
+ [data-testid="stAppViewContainer"] > .main {{
97
+ background-image: url("https://www.tata.com/content/dam/tata/images/verticals/desktop/banner_travel_umaidbhavan_desktop_1920x1080.jpg");
98
+ background-size: 200%;
99
+ background-position: center;
100
+ background-repeat: no-repeat;
101
+ background-attachment: local;
102
+ }}
103
+
104
+ [data-testid="stSidebar"] > div:first-child {{
105
+ background-image: url("data:image/png;base64,{img}");
106
+ background-position: center;
107
+ background-repeat: no-repeat;
108
+ background-attachment: fixed;
109
+ }}
110
+
111
+ [data-testid="stHeader"] {{
112
+ background: rgba(0,0,0,0);
113
+ }}
114
+
115
+ [data-testid="stToolbar"] {{
116
+ right: 2rem;
117
+ }}
118
+ </style>
119
+ """
120
+
121
+ st.markdown(page_bg_img, unsafe_allow_html=True)
122
+
123
+ def get_base64_encoded_image(image_path):
124
+ with open(image_path, "rb") as img_file:
125
+ encoded_string = base64.b64encode(img_file.read()).decode("utf-8")
126
+ return encoded_string
127
+
128
+
129
+ def main():
130
+ set_png_as_page_bg("net_technology_5407.jpg")
131
+ image_path = "artificial-intelligence.jpg" # Replace with the actual image file path
132
+
133
+ st.title("Sci-mcq-GPT")
134
+
135
+
136
+ link = "https://drive.google.com/file/d/1_2TqNNyoczhxIBmU7BpOzEi2bu3MC-sx/view?usp=sharing"
137
+ icon_path = "pdf download logo.png"
138
+ encoded_image = get_base64_encoded_image(icon_path)
139
+ lnk = f'<a href="{link}"><img src="data:image/png;base64,{encoded_image}" width="50" height="50"></a>'
140
+ col = st.sidebar
141
+ col.markdown(lnk, unsafe_allow_html=True)
142
+
143
+ st.subheader("Ask Q&A")
144
+ col1, col2 = st.columns(2)
145
+ query = col1.text_area("Enter your question")
146
+
147
+ if col1.button("Get Answer"):
148
+ ans = get_ans(query)
149
+ print(ans)
150
+ col2.text_area("Sci-mcq-GPT Response", ans)
151
+
152
+ else:
153
+ col2.text_area("Sci-mcq-GPT Response", value="")
154
+
155
+ col_sidebar = st.sidebar
156
+ col_sidebar.image(image_path, caption=" ", width=300)
157
+
158
+ if __name__ == "__main__":
159
+ main()
artificial-intelligence.jpg ADDED
net_technology_5407.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ bitsandbytes
3
+ einops
4
+ langchain
5
+ lvwerra-trl
6
+ torch
7
+ tqdm
8
+ transformers
9
+ wandb
10
+ streamlit