Spaces:
Sleeping
Sleeping
UPDATE: Web Crawler
Browse files- app.py +14 -0
- functions.py +1 -1
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
import io
|
|
|
2 |
from functions import *
|
3 |
from PyPDF2 import PdfReader
|
|
|
4 |
from fastapi import FastAPI, File, UploadFile
|
5 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
6 |
|
7 |
|
8 |
app = FastAPI(title = "ConversAI", root_path = "/api/v1")
|
@@ -47,6 +50,17 @@ async def addText(vectorstore: str, text: str):
|
|
47 |
return addDocuments(text = text, vectorstore = vectorstore)
|
48 |
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
@app.post("/answerQuery")
|
51 |
async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"):
|
52 |
return answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel)
|
|
|
1 |
import io
|
2 |
+
import re
|
3 |
from functions import *
|
4 |
from PyPDF2 import PdfReader
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
from fastapi import FastAPI, File, UploadFile
|
7 |
from fastapi.middleware.cors import CORSMiddleware
|
8 |
+
from langchain_community.document_loaders import RecursiveUrlLoader
|
9 |
|
10 |
|
11 |
app = FastAPI(title = "ConversAI", root_path = "/api/v1")
|
|
|
50 |
return addDocuments(text = text, vectorstore = vectorstore)
|
51 |
|
52 |
|
53 |
+
@app.post("/addWebsite")
|
54 |
+
async def addWebsite(vectorstore: str, websiteUrl: str):
|
55 |
+
def bs4_extractor(html: str) -> str:
|
56 |
+
soup = BeautifulSoup(html, "lxml")
|
57 |
+
return re.sub(r"\n\n+", "\n\n", soup.text).strip()
|
58 |
+
loader = RecursiveUrlLoader(websiteUrl, max_depth=2, timeout = 60, extractor=bs4_extractor)
|
59 |
+
docs = loader.load()
|
60 |
+
text = "\n\n".join([docs[doc].page_content for doc in docs])
|
61 |
+
return addDocuments(text = text, vectorstore = vectorstore)
|
62 |
+
|
63 |
+
|
64 |
@app.post("/answerQuery")
|
65 |
async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"):
|
66 |
return answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel)
|
functions.py
CHANGED
@@ -32,7 +32,7 @@ vectorEmbeddings = HuggingFaceEmbeddings(
|
|
32 |
model_kwargs = model_kwargs,
|
33 |
encode_kwargs = encode_kwargs
|
34 |
)
|
35 |
-
sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25",
|
36 |
prompt = """
|
37 |
### Role
|
38 |
- **Primary Function**: You are an AI chatbot dedicated to assisting users with their inquiries, issues, and requests. Your goal is to deliver excellent, friendly, and efficient responses at all times. Listen attentively, understand user needs, and provide the best assistance possible or direct them to appropriate resources. If a question is unclear, ask for clarification. Always conclude your replies on a positive note.
|
|
|
32 |
model_kwargs = model_kwargs,
|
33 |
encode_kwargs = encode_kwargs
|
34 |
)
|
35 |
+
sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25", parallel = 2)
|
36 |
prompt = """
|
37 |
### Role
|
38 |
- **Primary Function**: You are an AI chatbot dedicated to assisting users with their inquiries, issues, and requests. Your goal is to deliver excellent, friendly, and efficient responses at all times. Listen attentively, understand user needs, and provide the best assistance possible or direct them to appropriate resources. If a question is unclear, ask for clarification. Always conclude your replies on a positive note.
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
huggingface-hub
|
2 |
fastapi
|
3 |
fastembed-gpu
|
|
|
1 |
+
bs4
|
2 |
huggingface-hub
|
3 |
fastapi
|
4 |
fastembed-gpu
|