Spaces:
Sleeping
Sleeping
0504ankitsharma
commited on
upload 9 files
Browse files- .gitattributes +1 -0
- Dockerfile +30 -0
- README.md +4 -5
- __pycache__/main.cpython-310.pyc +0 -0
- __pycache__/main.cpython-312.pyc +0 -0
- app.py +144 -0
- data/Data.docx +0 -0
- requirements.txt +150 -0
- vectors_db/index.faiss +3 -0
- vectors_db/index.pkl +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
vectors_db/index.faiss filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use the official lightweight Python image as the base
|
2 |
+
FROM python:3.9-slim
|
3 |
+
|
4 |
+
# Set environment variables to prevent Python from buffering outputs
|
5 |
+
ENV PYTHONDONTWRITEBYTECODE 1
|
6 |
+
ENV PYTHONUNBUFFERED 1
|
7 |
+
|
8 |
+
# Set the working directory inside the container
|
9 |
+
WORKDIR /app
|
10 |
+
|
11 |
+
# Install system dependencies (optional, adjust as needed)
|
12 |
+
RUN apt-get update && apt-get install -y \
|
13 |
+
gcc \
|
14 |
+
libpq-dev \
|
15 |
+
&& rm -rf /var/lib/apt/lists/*
|
16 |
+
|
17 |
+
# Copy the requirements file into the container
|
18 |
+
COPY requirements.txt /app/
|
19 |
+
|
20 |
+
# Install Python dependencies
|
21 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
22 |
+
|
23 |
+
# Copy the application code into the container
|
24 |
+
COPY . /app/
|
25 |
+
|
26 |
+
# Expose the port FastAPI will run on
|
27 |
+
EXPOSE 7860
|
28 |
+
|
29 |
+
# Command to run the FastAPI application
|
30 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
---
|
2 |
-
title: Thapargpt
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
-
license: mit
|
9 |
---
|
10 |
|
11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Thapargpt
|
3 |
+
emoji: 🚀
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: indigo
|
6 |
sdk: docker
|
7 |
pinned: false
|
|
|
8 |
---
|
9 |
|
10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__pycache__/main.cpython-310.pyc
ADDED
Binary file (5.05 kB). View file
|
|
__pycache__/main.cpython-312.pyc
ADDED
Binary file (6.89 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from openai import OpenAI
|
4 |
+
from langchain_openai import ChatOpenAI
|
5 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
8 |
+
from langchain.chains import create_retrieval_chain
|
9 |
+
from langchain_community.vectorstores import FAISS
|
10 |
+
from langchain_community.document_loaders import UnstructuredWordDocumentLoader as DocxLoader
|
11 |
+
from fastapi.middleware.cors import CORSMiddleware
|
12 |
+
from fastapi import FastAPI
|
13 |
+
from pydantic import BaseModel
|
14 |
+
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
15 |
+
import time
|
16 |
+
|
17 |
+
def clean_response(response):
|
18 |
+
# Remove any leading/trailing whitespace, including newlines
|
19 |
+
cleaned = response.strip()
|
20 |
+
|
21 |
+
# Remove any enclosing quotation marks
|
22 |
+
cleaned = re.sub(r'^["\']+|["\']+$', '', cleaned)
|
23 |
+
|
24 |
+
# Replace multiple newlines with a single newline
|
25 |
+
cleaned = re.sub(r'\n+', '\n', cleaned)
|
26 |
+
|
27 |
+
# Remove any remaining '\n' characters
|
28 |
+
cleaned = cleaned.replace('\\n', '')
|
29 |
+
|
30 |
+
return cleaned
|
31 |
+
|
32 |
+
app = FastAPI()
|
33 |
+
|
34 |
+
app.add_middleware(
|
35 |
+
CORSMiddleware,
|
36 |
+
allow_origins=["*"],
|
37 |
+
allow_credentials=True,
|
38 |
+
allow_methods=["*"],
|
39 |
+
allow_headers=["*"],
|
40 |
+
)
|
41 |
+
|
42 |
+
openai_api_key = os.environ.get('OPENAI_API_KEY')
|
43 |
+
llm = ChatOpenAI(
|
44 |
+
api_key=openai_api_key,
|
45 |
+
model_name="gpt-4-turbo-preview", # or "gpt-3.5-turbo" for a more economical option
|
46 |
+
temperature=0.7
|
47 |
+
)
|
48 |
+
|
49 |
+
@app.get("/")
|
50 |
+
def read_root():
|
51 |
+
return {"Hello": "World"}
|
52 |
+
|
53 |
+
class Query(BaseModel):
|
54 |
+
query_text: str
|
55 |
+
|
56 |
+
prompt = ChatPromptTemplate.from_template(
|
57 |
+
"""
|
58 |
+
You are a helpful assistant designed specifically for the Thapar Institute of Engineering and Technology (TIET), a renowned technical college. Your task is to answer all queries related to TIET. Every response you provide should be relevant to the context of TIET. If a question falls outside of this context, please decline by stating, 'Sorry, I cannot help with that.' If you do not know the answer to a question, do not attempt to fabricate a response; instead, politely decline.
|
59 |
+
You may elaborate on your answers slightly to provide more information, but avoid sounding boastful or exaggerating. Stay focused on the context provided.
|
60 |
+
If the query is not related to TIET or falls outside the context of education, respond with:
|
61 |
+
"Sorry, I cannot help with that. I'm specifically designed to answer questions about the Thapar Institute of Engineering and Technology.
|
62 |
+
For more information, please contact at our toll-free number: 18002024100 or E-mail us at admissions@thapar.edu
|
63 |
+
<context>
|
64 |
+
{context}
|
65 |
+
</context>
|
66 |
+
Question: {input}
|
67 |
+
"""
|
68 |
+
)
|
69 |
+
|
70 |
+
def vector_embedding():
|
71 |
+
try:
|
72 |
+
file_path = "./data/Data.docx"
|
73 |
+
if not os.path.exists(file_path):
|
74 |
+
print(f"The file {file_path} does not exist.")
|
75 |
+
return {"response": "Error: Data file not found"}
|
76 |
+
|
77 |
+
loader = DocxLoader(file_path)
|
78 |
+
documents = loader.load()
|
79 |
+
|
80 |
+
print(f"Loaded document: {file_path}")
|
81 |
+
|
82 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
83 |
+
chunks = text_splitter.split_documents(documents)
|
84 |
+
|
85 |
+
print(f"Created {len(chunks)} chunks.")
|
86 |
+
|
87 |
+
model_name = "BAAI/bge-base-en"
|
88 |
+
encode_kwargs = {'normalize_embeddings': True}
|
89 |
+
model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs)
|
90 |
+
|
91 |
+
db = FAISS.from_documents(chunks, model_norm)
|
92 |
+
db.save_local("./vectors_db")
|
93 |
+
|
94 |
+
print("Vector store created and saved successfully.")
|
95 |
+
return {"response": "Vector Store DB Is Ready"}
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
print(f"An error occurred: {str(e)}")
|
99 |
+
return {"response": f"Error: {str(e)}"}
|
100 |
+
|
101 |
+
def get_embeddings():
|
102 |
+
model_name = "BAAI/bge-base-en"
|
103 |
+
encode_kwargs = {'normalize_embeddings': True}
|
104 |
+
model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs)
|
105 |
+
return model_norm
|
106 |
+
|
107 |
+
@app.post("/chat") # Changed from /anthropic to /chat
|
108 |
+
def read_item(query: Query):
|
109 |
+
try:
|
110 |
+
embeddings = get_embeddings()
|
111 |
+
vectors = FAISS.load_local("./vectors_db", embeddings, allow_dangerous_deserialization=True)
|
112 |
+
except Exception as e:
|
113 |
+
print(f"Error loading vector store: {str(e)}")
|
114 |
+
return {"response": "Vector Store Not Found or Error Loading. Please run /setup first."}
|
115 |
+
|
116 |
+
prompt1 = query.query_text
|
117 |
+
if prompt1:
|
118 |
+
start = time.process_time()
|
119 |
+
document_chain = create_stuff_documents_chain(llm, prompt)
|
120 |
+
retriever = vectors.as_retriever()
|
121 |
+
retrieval_chain = create_retrieval_chain(retriever, document_chain)
|
122 |
+
response = retrieval_chain.invoke({'input': prompt1})
|
123 |
+
print("Response time:", time.process_time() - start)
|
124 |
+
|
125 |
+
# Apply the cleaning function to the response
|
126 |
+
cleaned_response = clean_response(response['answer'])
|
127 |
+
|
128 |
+
# For debugging, print the cleaned response
|
129 |
+
print("Cleaned response:", repr(cleaned_response))
|
130 |
+
|
131 |
+
return cleaned_response
|
132 |
+
else:
|
133 |
+
return "No Query Found"
|
134 |
+
|
135 |
+
@app.get("/setup")
|
136 |
+
def setup():
|
137 |
+
return vector_embedding()
|
138 |
+
|
139 |
+
# Uncomment this to check if the API key is set
|
140 |
+
# print(f"API key set: {'Yes' if os.environ.get('OPENAI_API_KEY') else 'No'}")
|
141 |
+
|
142 |
+
if __name__ == "__main__":
|
143 |
+
import uvicorn
|
144 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
data/Data.docx
ADDED
Binary file (344 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==24.1.0
|
2 |
+
aiohappyeyeballs==2.4.3
|
3 |
+
aiohttp==3.11.7
|
4 |
+
aiosignal==1.3.1
|
5 |
+
annotated-types==0.7.0
|
6 |
+
anyio==4.6.2.post1
|
7 |
+
attrs==24.2.0
|
8 |
+
backoff==2.2.1
|
9 |
+
beautifulsoup4==4.12.3
|
10 |
+
cachetools==5.5.0
|
11 |
+
certifi==2024.8.30
|
12 |
+
cffi==1.17.1
|
13 |
+
chardet==5.2.0
|
14 |
+
charset-normalizer==3.4.0
|
15 |
+
click==8.1.7
|
16 |
+
cryptography==43.0.3
|
17 |
+
dataclasses-json==0.6.7
|
18 |
+
distro==1.9.0
|
19 |
+
docstring_parser==0.16
|
20 |
+
emoji==2.14.0
|
21 |
+
eval_type_backport==0.2.0
|
22 |
+
faiss-cpu==1.9.0.post1
|
23 |
+
fastapi==0.115.5
|
24 |
+
filelock==3.16.1
|
25 |
+
filetype==1.2.0
|
26 |
+
frozenlist==1.5.0
|
27 |
+
fsspec==2024.10.0
|
28 |
+
google-ai-generativelanguage==0.6.10
|
29 |
+
google-api-core==2.23.0
|
30 |
+
google-api-python-client==2.154.0
|
31 |
+
google-auth==2.36.0
|
32 |
+
google-auth-httplib2==0.2.0
|
33 |
+
google-cloud-aiplatform==1.73.0
|
34 |
+
google-cloud-bigquery==3.27.0
|
35 |
+
google-cloud-core==2.4.1
|
36 |
+
google-cloud-resource-manager==1.13.1
|
37 |
+
google-cloud-storage==2.18.2
|
38 |
+
google-crc32c==1.6.0
|
39 |
+
google-generativeai==0.8.3
|
40 |
+
google-resumable-media==2.7.2
|
41 |
+
googleapis-common-protos==1.66.0
|
42 |
+
greenlet==3.1.1
|
43 |
+
grpc-google-iam-v1==0.13.1
|
44 |
+
grpcio==1.68.0
|
45 |
+
grpcio-status==1.68.0
|
46 |
+
h11==0.14.0
|
47 |
+
html5lib==1.1
|
48 |
+
httpcore==1.0.7
|
49 |
+
httplib2==0.22.0
|
50 |
+
httpx==0.27.2
|
51 |
+
httpx-sse==0.4.0
|
52 |
+
huggingface-hub==0.26.2
|
53 |
+
idna==3.10
|
54 |
+
Jinja2==3.1.4
|
55 |
+
jiter==0.7.1
|
56 |
+
joblib==1.4.2
|
57 |
+
jsonpatch==1.33
|
58 |
+
jsonpath-python==1.0.6
|
59 |
+
jsonpointer==3.0.0
|
60 |
+
langchain==0.3.8
|
61 |
+
langchain-community==0.3.8
|
62 |
+
langchain-core==0.3.21
|
63 |
+
langchain-google-genai==2.0.5
|
64 |
+
langchain-openai==0.2.9
|
65 |
+
langchain-text-splitters==0.3.2
|
66 |
+
langdetect==1.0.9
|
67 |
+
langsmith==0.1.145
|
68 |
+
lxml==5.3.0
|
69 |
+
MarkupSafe==3.0.2
|
70 |
+
marshmallow==3.23.1
|
71 |
+
mpmath==1.3.0
|
72 |
+
multidict==6.1.0
|
73 |
+
mypy-extensions==1.0.0
|
74 |
+
nest-asyncio==1.6.0
|
75 |
+
networkx==3.4.2
|
76 |
+
nltk==3.9.1
|
77 |
+
numpy==1.26.4
|
78 |
+
nvidia-cublas-cu12==12.4.5.8
|
79 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
80 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
81 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
82 |
+
nvidia-cudnn-cu12==9.1.0.70
|
83 |
+
nvidia-cufft-cu12==11.2.1.3
|
84 |
+
nvidia-curand-cu12==10.3.5.147
|
85 |
+
nvidia-cusolver-cu12==11.6.1.9
|
86 |
+
nvidia-cusparse-cu12==12.3.1.170
|
87 |
+
nvidia-nccl-cu12==2.21.5
|
88 |
+
nvidia-nvjitlink-cu12==12.4.127
|
89 |
+
nvidia-nvtx-cu12==12.4.127
|
90 |
+
olefile==0.47
|
91 |
+
openai==1.55.0
|
92 |
+
orjson==3.10.11
|
93 |
+
packaging==24.2
|
94 |
+
pillow==11.0.0
|
95 |
+
propcache==0.2.0
|
96 |
+
proto-plus==1.25.0
|
97 |
+
protobuf==5.28.3
|
98 |
+
psutil==6.1.0
|
99 |
+
pyasn1==0.6.1
|
100 |
+
pyasn1_modules==0.4.1
|
101 |
+
pycparser==2.22
|
102 |
+
pydantic==2.9.2
|
103 |
+
pydantic-settings==2.6.1
|
104 |
+
pydantic_core==2.23.4
|
105 |
+
PyMuPDF==1.24.14
|
106 |
+
pyparsing==3.2.0
|
107 |
+
pypdf==5.1.0
|
108 |
+
PyPDF2==3.0.1
|
109 |
+
python-dateutil==2.8.2
|
110 |
+
python-docx==1.1.2
|
111 |
+
python-dotenv==1.0.1
|
112 |
+
python-iso639==2024.10.22
|
113 |
+
python-magic==0.4.27
|
114 |
+
python-oxmsg==0.0.1
|
115 |
+
PyYAML==6.0.2
|
116 |
+
RapidFuzz==3.10.1
|
117 |
+
regex==2024.11.6
|
118 |
+
requests==2.32.3
|
119 |
+
requests-toolbelt==1.0.0
|
120 |
+
rsa==4.9
|
121 |
+
safetensors==0.4.5
|
122 |
+
scikit-learn==1.5.2
|
123 |
+
scipy==1.14.1
|
124 |
+
sentence-transformers==3.3.1
|
125 |
+
setuptools==75.6.0
|
126 |
+
shapely==2.0.6
|
127 |
+
six==1.16.0
|
128 |
+
sniffio==1.3.1
|
129 |
+
soupsieve==2.6
|
130 |
+
SQLAlchemy==2.0.35
|
131 |
+
starlette==0.41.3
|
132 |
+
sympy==1.13.1
|
133 |
+
tenacity==9.0.0
|
134 |
+
threadpoolctl==3.5.0
|
135 |
+
tiktoken==0.8.0
|
136 |
+
tokenizers==0.20.3
|
137 |
+
torch==2.5.1
|
138 |
+
tqdm==4.67.0
|
139 |
+
transformers==4.46.3
|
140 |
+
triton==3.1.0
|
141 |
+
typing-inspect==0.9.0
|
142 |
+
typing_extensions==4.12.2
|
143 |
+
unstructured==0.16.6
|
144 |
+
unstructured-client==0.28.0
|
145 |
+
uritemplate==4.1.1
|
146 |
+
urllib3==2.2.3
|
147 |
+
uvicorn==0.32.1
|
148 |
+
webencodings==0.5.1
|
149 |
+
wrapt==1.17.0
|
150 |
+
yarl==1.18.0
|
vectors_db/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccd7b8fc38a26996f66c51ff0558b2cf1bc7a4e13143acec3be23b86c7d06607
|
3 |
+
size 3373101
|
vectors_db/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2eb5fee5010d291cd821f48562955973db26f26708e5d226ca03c624400ff13b
|
3 |
+
size 537595
|