Spaces:

yixin6178
/

ChatPaper

Build error

App Files Files Community

ChatPaper / base_class.py

yiyixin

upload

bb4d908 over 1 year ago

raw

history blame

3.1 kB

	import abc
	import pandas as pd
	import pickle


	class SimilarityAlg(metaclass=abc.ABCMeta):
	"""Similarity Algorithm to compute similarity between query_embedding and embeddings"""

	def __init__(self) -> None:
	pass

	@abc.abstractmethod
	def __call__(self, query_embedding, embeddings) -> None:
	pass


	class Embedding_Model(metaclass=abc.ABCMeta):
	"""Embedding Model to compute embedding of a text"""

	def __init__(self, model_name) -> None:
	"""Initialize the embedding model"""
	embedding_cache_path = f"/app/ckpt/embedding_cache_{model_name}.pkl"
	self.embedding_cache_path = embedding_cache_path

	# load the cache if it exists, and save a copy to disk
	try:
	embedding_cache = pd.read_pickle(embedding_cache_path)
	except FileNotFoundError:
	embedding_cache = {}
	with open(embedding_cache_path, "wb") as embedding_cache_file:
	pickle.dump(embedding_cache, embedding_cache_file)
	self.embedding_cache = embedding_cache
	self.model_name = model_name

	@abc.abstractmethod
	def __call__(self, text) -> None:
	"""Compute the embedding of the text"""
	pass


	class AbstractPDFParser(metaclass=abc.ABCMeta):
	""" PDF parser to parse a PDF file"""

	def __init__(self, db_name) -> None:
	"""Initialize the pdf database"""
	db_cache_path = f"/app/ckpt/pdf_parser_{db_name}.pkl"
	self.db_cache_path = db_cache_path

	# load the cache if it exists, and save a copy to disk
	try:
	db_cache = pd.read_pickle(db_cache_path)
	except FileNotFoundError:
	db_cache = {}
	with open(db_cache_path, "wb") as cache_file:
	pickle.dump(db_cache, cache_file)
	self.db_cache = db_cache
	self.db_name = db_name

	@abc.abstractmethod
	def parse_pdf(self,) -> None:
	"""Parse the PDF file"""
	pass

	@abc.abstractmethod
	def _get_metadata(self, ) -> None:
	"""Get the metadata of the PDF file"""
	pass

	def get_paragraphs(self, ) -> None:
	"""Get the paragraphs of the PDF file"""
	pass

	@abc.abstractmethod
	def get_split_paragraphs(self, ) -> None:
	"""
	Get the split paragraphs of the PDF file
	Return:
	split_paragraphs: dict of metadata and corresponding list of split paragraphs
	"""
	pass

	def _determine_metadata_of_paragraph(self, paragraph) -> None:
	"""
	Determine the metadata of a paragraph
	Return:
	metadata: metadata of the paragraph
	"""
	pass

	@abc.abstractmethod
	def _determine_optimal_split_of_pargraphs(self, ) -> None:
	"""
	Determine the optimal split of paragraphs
	Return:
	split_paragraphs: dict of metadata and corresponding list of split paragraphs
	"""
	pass


	class ChatbotEngine(metaclass=abc.ABCMeta):
	def __init__(self,) -> None:
	pass

	@abc.abstractmethod
	def query(self, user_query):
	pass