Spaces:

jonathanjordan21
/

chat_with_me

Running

App Files Files Community

chat_with_me / custom_llm.py

jonathanjordan21

Update custom_llm.py

8675f62 verified 10 months ago

raw

history blame

5.26 kB

	from typing import Any, List, Mapping, Optional

	from langchain_core.callbacks.manager import CallbackManagerForLLMRun
	from langchain_core.language_models.llms import LLM
	from typing import Literal
	import requests
	from langchain.prompts import PromptTemplate, ChatPromptTemplate
	from operator import itemgetter

	from langchain.memory import ChatMessageHistory, ConversationBufferMemory
	from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain_community.chat_models import ChatOpenAI
	from langchain_core.runnables import RunnableLambda, RunnablePassthrough
	from langchain_core.messages import AIMessage, HumanMessage

	from langchain_community.document_loaders import DirectoryLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import PyMuPDFLoader
	import os
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS


	def create_vectorstore():
	loader = os.getenv('knowledge_base')

	splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=20)

	docs = splitter.create_documents([loader])

	emb_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2', encode_kwargs={'normalize_embeddings': True})
	db = FAISS.from_documents(docs, emb_model)
	return db

	def custom_chain_with_history(llm, memory):

	prompt = PromptTemplate.from_template("""<s><INST><\|system\|>
	You are a college student at Gunadarma University, majoring in Informatics / Computer Science. Your name is Jonathan Jordan, but people call you Jojo. You have expertise in Artificial Intelligence and Data Science. You are also good at Data Engineering with AirFlow and Apache Spark. You currently live in Tegal Alur, Kalideres, West Jakarta, Jakarta, Indonesia. You are 21 years old. Your birthday is Friday, 21 June 2002.

	Below are the detailed personal information about you:
	{context}

	Respond to the user with the following chat history between you and the user, if there is no relevant information that means you regard the information as a secret and you are not willing to share the information to the user:
	{chat_history}
	<\|user\|>
	{question}
	<\|you\|>
	""")

	def prompt_memory(memory):
	t = ""
	for x in memory.chat_memory.messages:
	# for x in memory.messages:
	t += f"<\|you\|>\n<s>{x.content}</s>\n" if type(x) is AIMessage else f"<\|user\|>\n{x.content}\n"
	return "" if len(t) == 0 else t

	def format_docs(docs):
	print(len(docs))
	return "\n".join([f"{i+1}. {d.page_content}" for i,d in enumerate(docs)])

	# prompt = ChatPromptTemplate.from_messages(
	# [
	# ("system", "You are a helpful chatbot"),
	# MessagesPlaceholder(variable_name="history"),
	# ("human", "{input}"),
	# ]
	# )

	# return {"chat_history":lambda x:, "context":create_vectorstore().as_retriever(search_type="similarity", search_kwargs={"k": 8}) \| format_docs, "question": RunnablePassthrough()} \| prompt \| llm
	return {"chat_history":lambda x:prompt_memory(x['memory']), "context":itemgetter("question") \| create_vectorstore().as_retriever(search_type="similarity", search_kwargs={"k": 8}) \| format_docs, "question": lambda x:x['question']} \| prompt \| llm

	class CustomLLM(LLM):
	repo_id : str
	api_token : str
	model_type: Literal["text2text-generation", "text-generation"]
	max_new_tokens: int = None
	temperature: float = 0.001
	timeout: float = None
	top_p: float = None
	top_k : int = None
	repetition_penalty : float = None
	stop : List[str] = []


	@property
	def _llm_type(self) -> str:
	return "custom"

	def _call(
	self,
	prompt: str,
	stop: Optional[List[str]] = None,
	run_manager: Optional[CallbackManagerForLLMRun] = None,
	**kwargs: Any,
	) -> str:

	headers = {"Authorization": f"Bearer {self.api_token}"}
	API_URL = f"https://api-inference.huggingface.co/models/{self.repo_id}"

	parameters_dict = {
	'max_new_tokens': self.max_new_tokens,
	'temperature': self.temperature,
	'timeout': self.timeout,
	'top_p': self.top_p,
	'top_k': self.top_k,
	'repetition_penalty': self.repetition_penalty,
	'stop':self.stop
	}

	if self.model_type == 'text-generation':
	parameters_dict["return_full_text"]=False

	data = {"inputs": prompt, "parameters":parameters_dict, "options":{"wait_for_model":True}}
	data = requests.post(API_URL, headers=headers, json=data).json()
	return data[0]['generated_text']

	@property
	def _identifying_params(self) -> Mapping[str, Any]:
	"""Get the identifying parameters."""
	return {
	'repo_id': self.repo_id,
	'model_type':self.model_type,
	'stop_sequences':self.stop,
	'max_new_tokens': self.max_new_tokens,
	'temperature': self.temperature,
	'timeout': self.timeout,
	'top_p': self.top_p,
	'top_k': self.top_k,
	'repetition_penalty': self.repetition_penalty
	}