web_scrap / app.py
vishnu23's picture
Update app.py
68a7260
raw
history blame contribute delete
No virus
775 Bytes
import gradio as gr
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
def scrap(urls):
loaders = UnstructuredURLLoader(urls=[urls])
data = loaders.load()
# Text Splitter
text_splitter = CharacterTextSplitter(separator='\n',
chunk_size=1000,
chunk_overlap=200)
docs = text_splitter.split_documents(data)
return docs
iface = gr.Interface(fn = scrap,
inputs = "text",
outputs = ['text'],
title = 'WebScrap',
description="Get content of the website from given website URL")
iface.launch(inline = False)