Spaces:
Sleeping
Sleeping
UPDATE: web crawler
Browse files- Dockerfile +1 -1
- app.py +1 -1
- secrets.env +2 -1
Dockerfile
CHANGED
@@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y \
|
|
12 |
&& apt-get clean \
|
13 |
&& rm -rf /var/lib/apt/lists/*
|
14 |
|
15 |
-
RUN pip install -r requirements.txt
|
16 |
|
17 |
EXPOSE 7860
|
18 |
|
|
|
12 |
&& apt-get clean \
|
13 |
&& rm -rf /var/lib/apt/lists/*
|
14 |
|
15 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
16 |
|
17 |
EXPOSE 7860
|
18 |
|
app.py
CHANGED
@@ -50,7 +50,7 @@ async def addText(vectorstore: str, text: str):
|
|
50 |
|
51 |
@app.post("/addWebsite")
|
52 |
async def addWebsite(vectorstore: str, websiteUrl: str):
|
53 |
-
urls = getLinks(
|
54 |
loader = UnstructuredURLLoader(urls=urls)
|
55 |
docs = loader.load()
|
56 |
text = "\n\n\n\n".join([f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
|
|
|
50 |
|
51 |
@app.post("/addWebsite")
|
52 |
async def addWebsite(vectorstore: str, websiteUrl: str):
|
53 |
+
urls = getLinks(websiteUrl)
|
54 |
loader = UnstructuredURLLoader(urls=urls)
|
55 |
docs = loader.load()
|
56 |
text = "\n\n\n\n".join([f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
|
secrets.env
CHANGED
@@ -3,4 +3,5 @@ SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZi
|
|
3 |
GROQ_API_KEY=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
|
4 |
QDRANT_URL=https://baeef19e-8f9f-4b14-b95f-45946d6fe1e6.us-east4-0.gcp.cloud.qdrant.io:6333
|
5 |
QDRANT_API_KEY=k0V8kKNulQdRLukhYy03kJcncctoDImbiPHgmvaEEsup8MwTjqgT0w
|
6 |
-
COHERE_API_KEY=lCu3rZEjcUPAt0RsdQpQlGtgYp1uKAmuNIBdjFKq
|
|
|
|
3 |
GROQ_API_KEY=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
|
4 |
QDRANT_URL=https://baeef19e-8f9f-4b14-b95f-45946d6fe1e6.us-east4-0.gcp.cloud.qdrant.io:6333
|
5 |
QDRANT_API_KEY=k0V8kKNulQdRLukhYy03kJcncctoDImbiPHgmvaEEsup8MwTjqgT0w
|
6 |
+
COHERE_API_KEY=lCu3rZEjcUPAt0RsdQpQlGtgYp1uKAmuNIBdjFKq
|
7 |
+
NLTK_DATA=/app/nltk_data
|