Spaces:
Running
Running
| from langchain_text_splitters import CharacterTextSplitter | |
| from langchain_core.documents import Document # Added import for Document | |
| from sentence_transformers import SentenceTransformer, util | |
| import pandas as pd | |
| class rag_text_chooser: | |
| def __init__(self,data_rag): | |
| self.data_rag = pd.read_excel(data_rag) | |
| self.corpus, self.answers = self.get_questions_Answers() | |
| self.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2') | |
| self.corpus_embeddings = self.model.encode(self.corpus) | |
| def get_questions_Answers(self): | |
| questions = [] | |
| answers = [] | |
| for _, row in self.data_rag.iterrows(): | |
| questions.append(row.iloc[0]) | |
| answers.append(row.iloc[1]) | |
| return questions, answers | |
| def get_relevant_question(self, query): | |
| # 4) Encode the Arabic query | |
| query_embedding = self.model.encode(query) | |
| # 5) Compute cosine similarity | |
| cos_scores = util.cos_sim(query_embedding, self.corpus_embeddings)[0] | |
| # 6) Rank results | |
| top_results = cos_scores.argsort(descending=True) | |
| matched_result = None | |
| for idx in top_results: | |
| if (cos_scores[idx] < .7): # Mostly not relate to her: | |
| matched_result = None | |
| else: | |
| matched_result = (f"Questions {self.corpus[idx]} \n Answer {self.answers[idx]} \n (score: {cos_scores[idx]:.4f})") | |
| break | |
| return matched_result |