| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from llama_cpp import Llama | |
| def extract_text_from_pdf(pdf_path): | |
| loader = PyPDFLoader(pdf_path) | |
| pages = loader.load_and_split() | |
| all_text = " ".join([page.page_content for page in pages]) | |
| start_index = all_text.find("ABSTRACT") | |
| end_index = all_text.find("REFERENCES") | |
| if start_index != -1 and end_index != -1 and start_index < end_index: | |
| relevant_text = all_text[start_index:end_index] | |
| else: | |
| relevant_text = "Unable to locate the specified sections in the document." | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50) | |
| text_list = text_splitter.split_text(relevant_text) | |
| research_paper_text = "".join(text_list) | |
| length_of_research_paper = len(research_paper_text) | |
| return research_paper_text, length_of_research_paper | |
| def load_llm_model(): | |
| try: | |
| llm = Llama.from_pretrained( | |
| repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF", | |
| filename="Llama-3.2-1B-Instruct-Q8_0.gguf", | |
| n_ctx=50000, | |
| n_batch=16384, | |
| verbose=False, | |
| ) | |
| print("LLM model loaded successfully") | |
| return llm | |
| except Exception as e: | |
| print(f"Error loading LLM model: {e}") | |
| raise |