Spaces:
Runtime error
Runtime error
import json | |
import os | |
from pprint import pprint | |
import bitsandbytes as bnb | |
import pandas as pd | |
import torch | |
import torch.nn as nn | |
import transformers | |
from datasets import load_dataset | |
from huggingface_hub import notebook_login | |
from peft import ( | |
LoraConfig, | |
PeftConfig, | |
PeftModel, | |
get_peft_model, | |
prepare_model_for_kbit_training, | |
) | |
from transformers import ( | |
AutoConfig, | |
AutoModelForCausalLM, | |
AutoTokenizer, | |
BitsAndBytesConfig, | |
) | |
os.environ['CUDA_VISIBLE_DEVICES'] = '0' | |
PEFT_MODEL = 'deedax/falcon-7b-personal-assistant' | |
config = PeftConfig.from_pretrained(PEFT_MODEL) | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit = True, | |
bnb_4bit_use_double_quant = True, | |
bnb_4bit_quant_type = 'nf4', | |
bnb_4bit_compute_dtype = torch.bfloat16, | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
config.base_model_name_or_path, | |
return_dict = True, | |
quantization_config = bnb_config, | |
device_map = 'auto', | |
trust_remote_code = True, | |
) | |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
tokenizer.pad_token = tokenizer.eos_token | |
model = PeftModel.from_pretrained(model, PEFT_MODEL) | |
model.config.use_cache = False | |
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu' | |
generation_config = model.generation_config | |
generation_config.max_new_tokens = 200 | |
generation_config.temperature = 0.1 | |
generation_config.top_p = 0.3 | |
generation_config.num_return_sequences = 1 | |
generation_config.pad_token_id = tokenizer.eos_token_id | |
generation_config.eos_token_id = tokenizer.eos_token_id | |
def generate_response(question: str) -> str: | |
prompt = f''' | |
Below is a conversation between an interviewer and a candidate, You are Dahiru Ibrahim, the candidate. | |
Your contact details are as follows | |
github:https://github.com/Daheer | |
youtube:https://www.youtube.com/@deedaxinc | |
linkedin:https://linkedin.com/in/daheer-deedax | |
huggingface:https://huggingface.co/deedax | |
email:suhayrid6@gmail.com | |
phone:+2348147116750 | |
Provide very SHORT, CONCISE, DIRECT and ACCURATE answers to the interview questions. | |
You do not respond as 'Interviewer' or pretend to be 'Interviewer'. You only respond ONCE as Candidate. | |
Interviewer: {question} | |
Candidate: | |
'''.strip() | |
encoding = tokenizer(prompt, return_tensors = 'pt').to(DEVICE) | |
with torch.inference_mode(): | |
outputs = model.generate( | |
input_ids = encoding.input_ids, | |
attention_mask = encoding.attention_mask, | |
generation_config = generation_config, | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens = True) | |
assistant_start = 'Candidate:' | |
response_start = response.find(assistant_start) | |
return response[response_start + len(assistant_start):].strip() | |
import streamlit as st | |
import random | |
st.title("π¬ Deedax Chat (Falcon-7B-Instruct)") | |
if "messages" not in st.session_state: | |
st.session_state["messages"] = [{"role": "assistant", "content": "Ask me anything about Dahiru!"}] | |
for msg in st.session_state.messages: | |
st.chat_message(msg["role"]).write(msg["content"]) | |
if prompt := st.chat_input(): | |
st.session_state.messages = [] | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
st.chat_message("user").write(prompt) | |
msg = {'role': 'message', 'content': str(generate_response(prompt))} | |
st.session_state.messages.append(msg) | |
st.chat_message("assistant").write(msg['content']) |