0504ankitsharma commited on
Commit
232f6b1
·
verified ·
1 Parent(s): abfc48a

upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ vectors_db/index.faiss filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official lightweight Python image as the base
2
+ FROM python:3.9-slim
3
+
4
+ # Set environment variables to prevent Python from buffering outputs
5
+ ENV PYTHONDONTWRITEBYTECODE 1
6
+ ENV PYTHONUNBUFFERED 1
7
+
8
+ # Set the working directory inside the container
9
+ WORKDIR /app
10
+
11
+ # Install system dependencies (optional, adjust as needed)
12
+ RUN apt-get update && apt-get install -y \
13
+ gcc \
14
+ libpq-dev \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ # Copy the requirements file into the container
18
+ COPY requirements.txt /app/
19
+
20
+ # Install Python dependencies
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+
23
+ # Copy the application code into the container
24
+ COPY . /app/
25
+
26
+ # Expose the port FastAPI will run on
27
+ EXPOSE 7860
28
+
29
+ # Command to run the FastAPI application
30
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,10 @@
1
  ---
2
- title: Thapargpt Openai
3
- emoji: 🌖
4
- colorFrom: blue
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
8
- license: mit
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Thapargpt
3
+ emoji: 🚀
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
__pycache__/main.cpython-310.pyc ADDED
Binary file (5.05 kB). View file
 
__pycache__/main.cpython-312.pyc ADDED
Binary file (6.89 kB). View file
 
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from openai import OpenAI
4
+ from langchain_openai import ChatOpenAI
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain.chains import create_retrieval_chain
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_community.document_loaders import UnstructuredWordDocumentLoader as DocxLoader
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+ from fastapi import FastAPI
13
+ from pydantic import BaseModel
14
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
15
+ import time
16
+
17
+ def clean_response(response):
18
+ # Remove any leading/trailing whitespace, including newlines
19
+ cleaned = response.strip()
20
+
21
+ # Remove any enclosing quotation marks
22
+ cleaned = re.sub(r'^["\']+|["\']+$', '', cleaned)
23
+
24
+ # Replace multiple newlines with a single newline
25
+ cleaned = re.sub(r'\n+', '\n', cleaned)
26
+
27
+ # Remove any remaining '\n' characters
28
+ cleaned = cleaned.replace('\\n', '')
29
+
30
+ return cleaned
31
+
32
+ app = FastAPI()
33
+
34
+ app.add_middleware(
35
+ CORSMiddleware,
36
+ allow_origins=["*"],
37
+ allow_credentials=True,
38
+ allow_methods=["*"],
39
+ allow_headers=["*"],
40
+ )
41
+
42
+ openai_api_key = os.environ.get('OPENAI_API_KEY')
43
+ llm = ChatOpenAI(
44
+ api_key=openai_api_key,
45
+ model_name="gpt-4-turbo-preview", # or "gpt-3.5-turbo" for a more economical option
46
+ temperature=0.7
47
+ )
48
+
49
+ @app.get("/")
50
+ def read_root():
51
+ return {"Hello": "World"}
52
+
53
+ class Query(BaseModel):
54
+ query_text: str
55
+
56
+ prompt = ChatPromptTemplate.from_template(
57
+ """
58
+ You are a helpful assistant designed specifically for the Thapar Institute of Engineering and Technology (TIET), a renowned technical college. Your task is to answer all queries related to TIET. Every response you provide should be relevant to the context of TIET. If a question falls outside of this context, please decline by stating, 'Sorry, I cannot help with that.' If you do not know the answer to a question, do not attempt to fabricate a response; instead, politely decline.
59
+ You may elaborate on your answers slightly to provide more information, but avoid sounding boastful or exaggerating. Stay focused on the context provided.
60
+ If the query is not related to TIET or falls outside the context of education, respond with:
61
+ "Sorry, I cannot help with that. I'm specifically designed to answer questions about the Thapar Institute of Engineering and Technology.
62
+ For more information, please contact at our toll-free number: 18002024100 or E-mail us at admissions@thapar.edu
63
+ <context>
64
+ {context}
65
+ </context>
66
+ Question: {input}
67
+ """
68
+ )
69
+
70
+ def vector_embedding():
71
+ try:
72
+ file_path = "./data/Data.docx"
73
+ if not os.path.exists(file_path):
74
+ print(f"The file {file_path} does not exist.")
75
+ return {"response": "Error: Data file not found"}
76
+
77
+ loader = DocxLoader(file_path)
78
+ documents = loader.load()
79
+
80
+ print(f"Loaded document: {file_path}")
81
+
82
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
83
+ chunks = text_splitter.split_documents(documents)
84
+
85
+ print(f"Created {len(chunks)} chunks.")
86
+
87
+ model_name = "BAAI/bge-base-en"
88
+ encode_kwargs = {'normalize_embeddings': True}
89
+ model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs)
90
+
91
+ db = FAISS.from_documents(chunks, model_norm)
92
+ db.save_local("./vectors_db")
93
+
94
+ print("Vector store created and saved successfully.")
95
+ return {"response": "Vector Store DB Is Ready"}
96
+
97
+ except Exception as e:
98
+ print(f"An error occurred: {str(e)}")
99
+ return {"response": f"Error: {str(e)}"}
100
+
101
+ def get_embeddings():
102
+ model_name = "BAAI/bge-base-en"
103
+ encode_kwargs = {'normalize_embeddings': True}
104
+ model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs)
105
+ return model_norm
106
+
107
+ @app.post("/chat") # Changed from /anthropic to /chat
108
+ def read_item(query: Query):
109
+ try:
110
+ embeddings = get_embeddings()
111
+ vectors = FAISS.load_local("./vectors_db", embeddings, allow_dangerous_deserialization=True)
112
+ except Exception as e:
113
+ print(f"Error loading vector store: {str(e)}")
114
+ return {"response": "Vector Store Not Found or Error Loading. Please run /setup first."}
115
+
116
+ prompt1 = query.query_text
117
+ if prompt1:
118
+ start = time.process_time()
119
+ document_chain = create_stuff_documents_chain(llm, prompt)
120
+ retriever = vectors.as_retriever()
121
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
122
+ response = retrieval_chain.invoke({'input': prompt1})
123
+ print("Response time:", time.process_time() - start)
124
+
125
+ # Apply the cleaning function to the response
126
+ cleaned_response = clean_response(response['answer'])
127
+
128
+ # For debugging, print the cleaned response
129
+ print("Cleaned response:", repr(cleaned_response))
130
+
131
+ return cleaned_response
132
+ else:
133
+ return "No Query Found"
134
+
135
+ @app.get("/setup")
136
+ def setup():
137
+ return vector_embedding()
138
+
139
+ # Uncomment this to check if the API key is set
140
+ # print(f"API key set: {'Yes' if os.environ.get('OPENAI_API_KEY') else 'No'}")
141
+
142
+ if __name__ == "__main__":
143
+ import uvicorn
144
+ uvicorn.run(app, host="0.0.0.0", port=8000)
data/Data.docx ADDED
Binary file (344 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ aiohappyeyeballs==2.4.3
3
+ aiohttp==3.11.7
4
+ aiosignal==1.3.1
5
+ annotated-types==0.7.0
6
+ anyio==4.6.2.post1
7
+ attrs==24.2.0
8
+ backoff==2.2.1
9
+ beautifulsoup4==4.12.3
10
+ cachetools==5.5.0
11
+ certifi==2024.8.30
12
+ cffi==1.17.1
13
+ chardet==5.2.0
14
+ charset-normalizer==3.4.0
15
+ click==8.1.7
16
+ cryptography==43.0.3
17
+ dataclasses-json==0.6.7
18
+ distro==1.9.0
19
+ docstring_parser==0.16
20
+ emoji==2.14.0
21
+ eval_type_backport==0.2.0
22
+ faiss-cpu==1.9.0.post1
23
+ fastapi==0.115.5
24
+ filelock==3.16.1
25
+ filetype==1.2.0
26
+ frozenlist==1.5.0
27
+ fsspec==2024.10.0
28
+ google-ai-generativelanguage==0.6.10
29
+ google-api-core==2.23.0
30
+ google-api-python-client==2.154.0
31
+ google-auth==2.36.0
32
+ google-auth-httplib2==0.2.0
33
+ google-cloud-aiplatform==1.73.0
34
+ google-cloud-bigquery==3.27.0
35
+ google-cloud-core==2.4.1
36
+ google-cloud-resource-manager==1.13.1
37
+ google-cloud-storage==2.18.2
38
+ google-crc32c==1.6.0
39
+ google-generativeai==0.8.3
40
+ google-resumable-media==2.7.2
41
+ googleapis-common-protos==1.66.0
42
+ greenlet==3.1.1
43
+ grpc-google-iam-v1==0.13.1
44
+ grpcio==1.68.0
45
+ grpcio-status==1.68.0
46
+ h11==0.14.0
47
+ html5lib==1.1
48
+ httpcore==1.0.7
49
+ httplib2==0.22.0
50
+ httpx==0.27.2
51
+ httpx-sse==0.4.0
52
+ huggingface-hub==0.26.2
53
+ idna==3.10
54
+ Jinja2==3.1.4
55
+ jiter==0.7.1
56
+ joblib==1.4.2
57
+ jsonpatch==1.33
58
+ jsonpath-python==1.0.6
59
+ jsonpointer==3.0.0
60
+ langchain==0.3.8
61
+ langchain-community==0.3.8
62
+ langchain-core==0.3.21
63
+ langchain-google-genai==2.0.5
64
+ langchain-openai==0.2.9
65
+ langchain-text-splitters==0.3.2
66
+ langdetect==1.0.9
67
+ langsmith==0.1.145
68
+ lxml==5.3.0
69
+ MarkupSafe==3.0.2
70
+ marshmallow==3.23.1
71
+ mpmath==1.3.0
72
+ multidict==6.1.0
73
+ mypy-extensions==1.0.0
74
+ nest-asyncio==1.6.0
75
+ networkx==3.4.2
76
+ nltk==3.9.1
77
+ numpy==1.26.4
78
+ nvidia-cublas-cu12==12.4.5.8
79
+ nvidia-cuda-cupti-cu12==12.4.127
80
+ nvidia-cuda-nvrtc-cu12==12.4.127
81
+ nvidia-cuda-runtime-cu12==12.4.127
82
+ nvidia-cudnn-cu12==9.1.0.70
83
+ nvidia-cufft-cu12==11.2.1.3
84
+ nvidia-curand-cu12==10.3.5.147
85
+ nvidia-cusolver-cu12==11.6.1.9
86
+ nvidia-cusparse-cu12==12.3.1.170
87
+ nvidia-nccl-cu12==2.21.5
88
+ nvidia-nvjitlink-cu12==12.4.127
89
+ nvidia-nvtx-cu12==12.4.127
90
+ olefile==0.47
91
+ openai==1.55.0
92
+ orjson==3.10.11
93
+ packaging==24.2
94
+ pillow==11.0.0
95
+ propcache==0.2.0
96
+ proto-plus==1.25.0
97
+ protobuf==5.28.3
98
+ psutil==6.1.0
99
+ pyasn1==0.6.1
100
+ pyasn1_modules==0.4.1
101
+ pycparser==2.22
102
+ pydantic==2.9.2
103
+ pydantic-settings==2.6.1
104
+ pydantic_core==2.23.4
105
+ PyMuPDF==1.24.14
106
+ pyparsing==3.2.0
107
+ pypdf==5.1.0
108
+ PyPDF2==3.0.1
109
+ python-dateutil==2.8.2
110
+ python-docx==1.1.2
111
+ python-dotenv==1.0.1
112
+ python-iso639==2024.10.22
113
+ python-magic==0.4.27
114
+ python-oxmsg==0.0.1
115
+ PyYAML==6.0.2
116
+ RapidFuzz==3.10.1
117
+ regex==2024.11.6
118
+ requests==2.32.3
119
+ requests-toolbelt==1.0.0
120
+ rsa==4.9
121
+ safetensors==0.4.5
122
+ scikit-learn==1.5.2
123
+ scipy==1.14.1
124
+ sentence-transformers==3.3.1
125
+ setuptools==75.6.0
126
+ shapely==2.0.6
127
+ six==1.16.0
128
+ sniffio==1.3.1
129
+ soupsieve==2.6
130
+ SQLAlchemy==2.0.35
131
+ starlette==0.41.3
132
+ sympy==1.13.1
133
+ tenacity==9.0.0
134
+ threadpoolctl==3.5.0
135
+ tiktoken==0.8.0
136
+ tokenizers==0.20.3
137
+ torch==2.5.1
138
+ tqdm==4.67.0
139
+ transformers==4.46.3
140
+ triton==3.1.0
141
+ typing-inspect==0.9.0
142
+ typing_extensions==4.12.2
143
+ unstructured==0.16.6
144
+ unstructured-client==0.28.0
145
+ uritemplate==4.1.1
146
+ urllib3==2.2.3
147
+ uvicorn==0.32.1
148
+ webencodings==0.5.1
149
+ wrapt==1.17.0
150
+ yarl==1.18.0
vectors_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd7b8fc38a26996f66c51ff0558b2cf1bc7a4e13143acec3be23b86c7d06607
3
+ size 3373101
vectors_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb5fee5010d291cd821f48562955973db26f26708e5d226ca03c624400ff13b
3
+ size 537595