talktopdf / app.py
naman4jain's picture
Update app.py
e204e2c
# -*- coding: utf-8 -*-
"""Copy of Talktopdf.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/162wknxpDdqaKCIWwAwA1k_zl1UIRwPuU
"""
#importing moudles
import requests
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub
from langchain.document_loaders import UnstructuredPDFLoader
file = requests.get("https://huggingface.co/spaces/naman4jain/talktopdf/resolve/main/cricket_tutorial.pdf")
loader = UnstructuredPDFLoader(file.pdf)
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap=0, separators=[" ", ",", "\n"])
docs = text_splitter.split_documents(document)
embedding = HuggingFaceEmbeddings()
db = FAISS.from_documents(docs, embedding)
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
llm=HuggingFaceHub(
repo_id="ashishkat/questionAnswer",
model_kwargs={"temperature":0.2, "max_length":256}
)
chain = load_qa_chain(llm, chain_type="stuff")
query = "How many players are there in a team?"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)
import gradio as gr
llm = HuggingFaceHub(repo_id="ashishkat/questionAnswer", model_kwargs={"temperature": 0.2, "max_length": 256})
chain = load_qa_chain(llm, chain_type="stuff")
def run_gradio(query):
docs = db.similarity_search(query)
result = chain.run(input_documents=docs, question=query)
return result
iface = gr.Interface(fn=run_gradio, inputs="text", outputs="text")
iface.launch()