# Mangacoder007/Llama-lite-finetune # virtual env qna-with-se import numpy as np import os from matplotlib import pyplot as plt # Streamlit Libraries import streamlit as st from streamlit_extras.add_vertical_space import add_vertical_space # PDF Function Libraries from PyPDF2 import PdfReader from fpdf import FPDF # Image Processing Libraries import cv2 import pytesseract from PIL import Image # Langchain Models and Embeddings Libraries from langchain.callbacks import get_openai_callback from langchain.llms import HuggingFaceHub from langchain.embeddings import HuggingFaceEmbeddings # from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # Langchain Text Processing Libraries from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains.question_answering import load_qa_chain # Vector sentence computation library import sentence_transformers # Langchain Vector Storage processing Libraries # import pickle from langchain.vectorstores import FAISS HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACE_TOKEN") # Model ID # ---"NousResearch/Llama-2-7b-chat-hf" # ---"Mangacoder007/Llama-lite-finetune" # ---"Mangacoder007/Llama-2-7b-chat-finetune" # ---"google/flan-t5-large" # repo_id = "google/flan-t5-large" repo_id = "Mangacoder007/Llama-lite-finetune" # Main Function def main(): # Setup Page Title & Icon st.header("Chat with PDF!") with st.sidebar: st.title('LLM Chat App 💬') st.markdown(''' ## About This is a simple chat application using LLM (Large Language Model). The model used here uses: - [Streamlit](https://streamlit.io/) - [LangChain](https://python.langchain.com/) ''') add_vertical_space(5) st.write('Made by Aditya, Aditee :), Shaswata, Rupkatha and Sharanya') # upload a PDF file pdf = st.file_uploader("Upload your PDF", type='pdf') st.divider() img = st.file_uploader("Upload your Image", type=['png','jpeg','jpg']) # Check if the user has uploaded a PDF file if pdf is not None: st.write("PDF Uploaded!") # Extracting text from PDF using PyPDF4 Library pdf_reader = PdfReader(pdf) text = "" for page in pdf_reader.pages: text += page.extract_text() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_text(text=text) # Creating and Storing Embeddings embeddings = HuggingFaceEmbeddings() VectorStore = FAISS.from_texts(chunks, embedding=embeddings) # Accept user questions/query query = st.text_input("Ask questions about your PDF file:") # Displaying Results from Model's Query Engine if query: docs = VectorStore.similarity_search(query=query, k=3) llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.75, "max_length": 512}) chain = load_qa_chain(llm=llm, chain_type="stuff") with get_openai_callback() as cb: response = chain.run(input_documents=docs, question=query) print(cb) st.write(response) # Check if User has uploaded a Image File if img is not None: st.write("Image Uploaded!") # Image Preprocssing Functions def process(img): # Resizing the image def reshape_image(image_path): new_size = (800, 500) resized_img = cv2.resize(image_path, new_size) return resized_img resized_img = reshape_image(img) cv2.imwrite("resized_img.jpg", resized_img) # Grayscale conversion and processing def grayscale(image): return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray_image = grayscale(resized_img) cv2.imwrite("gray.jpg", gray_image) def thick_font(image): import numpy as np image = cv2.bitwise_not(image) kernel = np.ones((1,1),np.uint8) image = cv2.dilate(image, kernel, iterations=1) image = cv2.bitwise_not(image) return (image) dilated_image = thick_font(gray_image) cv2.imwrite("dilated_image.jpg", dilated_image) image = "dilated_image.jpg" img = Image.open(image) text = '' text += pytesseract.image_to_string(img) # print(text) # Convert processed and extracted text to PDF def text_to_pdf(text, output_file): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, txt=text) pdf.output(output_file) output_file = "output.pdf" text_to_pdf(text, output_file) # Image Conversion to Numpy Array imag=Image.open(img) numpy_array = np.array(imag) process(numpy_array) # Accessing PDF File file_path = "C:\\Users\\KIIT\\c_programs\\Minor Proj\\pythonProject1\\output.pdf" pdf_reader = PdfReader(file_path) # Extracting Text from the PDF text = "" for page in pdf_reader.pages: text += page.extract_text() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_text(text=text) # Creating and Storing Embeddings embeddings = HuggingFaceEmbeddings() VectorStore = FAISS.from_texts(chunks, embedding=embeddings) # Accept user questions/query query = st.text_input("Ask questions about your PDF file:") # Displaying Results from Model's Query Engine if query: docs = VectorStore.similarity_search(query=query, k=3) llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.75, "max_length": 512}) chain = load_qa_chain(llm=llm, chain_type="stuff") with get_openai_callback() as cb: response = chain.run(input_documents=docs, question=query) print(cb) st.write(response) if __name__ == '__main__': main()