Spaces:
Running
Running
hetvaghasia39
commited on
Commit
•
f7afa35
1
Parent(s):
d365c26
running in docker
Browse files- Dockerfile +13 -4
- main.py +2 -1
- pragetx_scraper/pragetx_scraper/spiders/pages.py +3 -3
- requirements.txt +1 -1
- setup.py +2 -1
- utils.py +1 -0
Dockerfile
CHANGED
@@ -1,4 +1,8 @@
|
|
1 |
-
FROM python
|
|
|
|
|
|
|
|
|
2 |
COPY requirements.txt /app/requirements.txt
|
3 |
WORKDIR /app
|
4 |
RUN pip install -r requirements.txt
|
@@ -7,8 +11,8 @@ RUN useradd -m -u 1000 user
|
|
7 |
USER user
|
8 |
ENV HOME=/home/user \
|
9 |
PATH=/home/user/.local/bin:$PATH
|
10 |
-
|
11 |
-
WORKDIR
|
12 |
RUN playwright install
|
13 |
|
14 |
COPY --chown=user . $HOME/app
|
@@ -17,5 +21,10 @@ RUN scrapy crawl pages && \
|
|
17 |
cd $HOME/app && \
|
18 |
python setup.py
|
19 |
WORKDIR $HOME/app
|
|
|
|
|
|
|
|
|
20 |
EXPOSE 7860
|
21 |
-
|
|
|
|
1 |
+
FROM python:3.12.0
|
2 |
+
# RUN apt-get update && apt-get install -y \
|
3 |
+
# sqlite3 \
|
4 |
+
# libsqlite3-dev \
|
5 |
+
# && rm -rf /var/lib/apt/lists/*
|
6 |
COPY requirements.txt /app/requirements.txt
|
7 |
WORKDIR /app
|
8 |
RUN pip install -r requirements.txt
|
|
|
11 |
USER user
|
12 |
ENV HOME=/home/user \
|
13 |
PATH=/home/user/.local/bin:$PATH
|
14 |
+
|
15 |
+
WORKDIR /app
|
16 |
RUN playwright install
|
17 |
|
18 |
COPY --chown=user . $HOME/app
|
|
|
21 |
cd $HOME/app && \
|
22 |
python setup.py
|
23 |
WORKDIR $HOME/app
|
24 |
+
RUN curl https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64 -o frpc_linux_amd64_v0.2
|
25 |
+
USER root
|
26 |
+
RUN mv frpc_linux_amd64_v0.2 /usr/local/lib/python3.12/site-packages/gradio/
|
27 |
+
RUN chmod +x /usr/local/lib/python3.12/site-packages/gradio/frpc_linux_amd64_v0.2
|
28 |
EXPOSE 7860
|
29 |
+
USER user
|
30 |
+
CMD ["gradio", "main.py"]
|
main.py
CHANGED
@@ -67,7 +67,8 @@ Answer:
|
|
67 |
)
|
68 |
self.db = Chroma(persist_directory="./pragetx_chroma", embedding_function=HuggingFaceEmbeddings(), collection_name="pragetx")
|
69 |
# self.llm = ChatOllama(model="phi3:3.8b", base_url="http://localhost:11434", num_gpu=16)
|
70 |
-
self.llm = HuggingChat(email = os.getenv("HF_EMAIL") , psw = os.getenv("HF_PASS") )
|
|
|
71 |
self.chain = (
|
72 |
{"chat_history": self.chat_history, "context": self.db.as_retriever(search_kwargs={"k":3}), "question": RunnablePassthrough()} |
|
73 |
self.prompt |
|
|
|
67 |
)
|
68 |
self.db = Chroma(persist_directory="./pragetx_chroma", embedding_function=HuggingFaceEmbeddings(), collection_name="pragetx")
|
69 |
# self.llm = ChatOllama(model="phi3:3.8b", base_url="http://localhost:11434", num_gpu=16)
|
70 |
+
# self.llm = HuggingChat(email = os.getenv("HF_EMAIL") , psw = os.getenv("HF_PASS") )
|
71 |
+
self.llm = HuggingChat(email = "brij1808" , psw = "Brijesh321@R" )
|
72 |
self.chain = (
|
73 |
{"chat_history": self.chat_history, "context": self.db.as_retriever(search_kwargs={"k":3}), "question": RunnablePassthrough()} |
|
74 |
self.prompt |
|
pragetx_scraper/pragetx_scraper/spiders/pages.py
CHANGED
@@ -20,11 +20,11 @@ def md(html, **options):
|
|
20 |
|
21 |
class PagesSpider(scrapy.Spider):
|
22 |
name = "pages"
|
23 |
-
allowed_domains = ["
|
24 |
-
start_urls = ["https://
|
25 |
|
26 |
def start_requests(self):
|
27 |
-
url = "https://
|
28 |
print("Starting request")
|
29 |
print("URL", url)
|
30 |
yield scrapy.Request(url, meta={'playwright': True})
|
|
|
20 |
|
21 |
class PagesSpider(scrapy.Spider):
|
22 |
name = "pages"
|
23 |
+
allowed_domains = ["aadee.app"]
|
24 |
+
start_urls = ["https://aadee.app"]
|
25 |
|
26 |
def start_requests(self):
|
27 |
+
url = "https://aadee.app"
|
28 |
print("Starting request")
|
29 |
print("URL", url)
|
30 |
yield scrapy.Request(url, meta={'playwright': True})
|
requirements.txt
CHANGED
@@ -47,7 +47,7 @@ h11==0.14.0
|
|
47 |
httpcore==1.0.5
|
48 |
httptools==0.6.1
|
49 |
httpx==0.27.0
|
50 |
-
hugchat==0.4.
|
51 |
huggingface-hub==0.23.2
|
52 |
humanfriendly==10.0
|
53 |
hyperlink==21.0.0
|
|
|
47 |
httpcore==1.0.5
|
48 |
httptools==0.6.1
|
49 |
httpx==0.27.0
|
50 |
+
hugchat==0.4.10
|
51 |
huggingface-hub==0.23.2
|
52 |
humanfriendly==10.0
|
53 |
hyperlink==21.0.0
|
setup.py
CHANGED
@@ -31,4 +31,5 @@ for file in files:
|
|
31 |
# print(docs[idx].metadata)
|
32 |
docs[idx].metadata['url'] = url
|
33 |
print(docs[idx].metadata)
|
34 |
-
|
|
|
|
31 |
# print(docs[idx].metadata)
|
32 |
docs[idx].metadata['url'] = url
|
33 |
print(docs[idx].metadata)
|
34 |
+
if docs:
|
35 |
+
chroma.add_documents(docs)
|
utils.py
CHANGED
@@ -127,6 +127,7 @@ class HuggingChat(LLM):
|
|
127 |
return str(resp)
|
128 |
|
129 |
except Exception as e:
|
|
|
130 |
raise ValueError("ChatBot failed, please check your parameters. " + str(e))
|
131 |
|
132 |
@property
|
|
|
127 |
return str(resp)
|
128 |
|
129 |
except Exception as e:
|
130 |
+
print('e: ', e)
|
131 |
raise ValueError("ChatBot failed, please check your parameters. " + str(e))
|
132 |
|
133 |
@property
|