import json import os from pprint import pprint import bitsandbytes as bnb import pandas as pd import torch import torch.nn as nn import transformers from datasets import load_dataset from huggingface_hub import notebook_login from peft import ( LoraConfig, PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training, ) from transformers import ( AutoConfig, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, ) os.environ['CUDA_VISIBLE_DEVICES'] = '0' PEFT_MODEL = 'deedax/falcon-7b-personal-assistant' config = PeftConfig.from_pretrained(PEFT_MODEL) bnb_config = BitsAndBytesConfig( load_in_4bit = True, bnb_4bit_use_double_quant = True, bnb_4bit_quant_type = 'nf4', bnb_4bit_compute_dtype = torch.bfloat16, ) model = AutoModelForCausalLM.from_pretrained( config.base_model_name_or_path, return_dict = True, quantization_config = bnb_config, device_map = 'auto', trust_remote_code = True, ) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) tokenizer.pad_token = tokenizer.eos_token model = PeftModel.from_pretrained(model, PEFT_MODEL) model.config.use_cache = False DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu' generation_config = model.generation_config generation_config.max_new_tokens = 200 generation_config.temperature = 0.1 generation_config.top_p = 0.3 generation_config.num_return_sequences = 1 generation_config.pad_token_id = tokenizer.eos_token_id generation_config.eos_token_id = tokenizer.eos_token_id def generate_response(question: str) -> str: prompt = f''' Below is a conversation between an interviewer and a candidate, You are Dahiru Ibrahim, the candidate. Your contact details are as follows github:https://github.com/Daheer youtube:https://www.youtube.com/@deedaxinc linkedin:https://linkedin.com/in/daheer-deedax huggingface:https://huggingface.co/deedax email:suhayrid6@gmail.com phone:+2348147116750 Provide very SHORT, CONCISE, DIRECT and ACCURATE answers to the interview questions. You do not respond as 'Interviewer' or pretend to be 'Interviewer'. You only respond ONCE as Candidate. Interviewer: {question} Candidate: '''.strip() encoding = tokenizer(prompt, return_tensors = 'pt').to(DEVICE) with torch.inference_mode(): outputs = model.generate( input_ids = encoding.input_ids, attention_mask = encoding.attention_mask, generation_config = generation_config, ) response = tokenizer.decode(outputs[0], skip_special_tokens = True) assistant_start = 'Candidate:' response_start = response.find(assistant_start) return response[response_start + len(assistant_start):].strip() import streamlit as st import random st.title("💬 Deedax Chat (Falcon-7B-Instruct)") if "messages" not in st.session_state: st.session_state["messages"] = [{"role": "assistant", "content": "Ask me anything about Dahiru!"}] for msg in st.session_state.messages: st.chat_message(msg["role"]).write(msg["content"]) if prompt := st.chat_input(): st.session_state.messages = [] st.session_state.messages.append({"role": "user", "content": prompt}) st.chat_message("user").write(prompt) msg = {'role': 'message', 'content': str(generate_response(prompt))} st.session_state.messages.append(msg) st.chat_message("assistant").write(msg['content'])