Spaces:

0504ankitsharma
/

thapargpt_openai

Sleeping

App Files Files Community

0504ankitsharma commited on Nov 27, 2024

Commit

232f6b1

verified ·

1 Parent(s): abfc48a

upload 9 files

Browse files

Files changed (10) hide show

.gitattributes +1 -0
Dockerfile +30 -0
README.md +4 -5
__pycache__/main.cpython-310.pyc +0 -0
__pycache__/main.cpython-312.pyc +0 -0
app.py +144 -0
data/Data.docx +0 -0
requirements.txt +150 -0
vectors_db/index.faiss +3 -0
vectors_db/index.pkl +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+vectors_db/index.faiss filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+# Use the official lightweight Python image as the base
+FROM python:3.9-slim
+# Set environment variables to prevent Python from buffering outputs
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+# Set the working directory inside the container
+WORKDIR /app
+# Install system dependencies (optional, adjust as needed)
+RUN apt-get update && apt-get install -y \
+    gcc \
+    libpq-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Copy the requirements file into the container
+COPY requirements.txt /app/
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the application code into the container
+COPY . /app/
+# Expose the port FastAPI will run on
+EXPOSE 7860
+# Command to run the FastAPI application
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,10 @@
 ---
-title: Thapargpt Openai
-emoji: 🌖
-colorFrom: blue
-colorTo: blue
 sdk: docker
 pinned: false
-license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Thapargpt
+emoji: 🚀
+colorFrom: yellow
+colorTo: indigo
 sdk: docker
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (5.05 kB). View file

__pycache__/main.cpython-312.pyc ADDED Viewed

Binary file (6.89 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import os
+import re
+from openai import OpenAI
+from langchain_openai import ChatOpenAI
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.chains import create_retrieval_chain
+from langchain_community.vectorstores import FAISS
+from langchain_community.document_loaders import UnstructuredWordDocumentLoader as DocxLoader
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi import FastAPI
+from pydantic import BaseModel
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+import time
+def clean_response(response):
+    # Remove any leading/trailing whitespace, including newlines
+    cleaned = response.strip()
+    # Remove any enclosing quotation marks
+    cleaned = re.sub(r'^["\']+|["\']+$', '', cleaned)
+    # Replace multiple newlines with a single newline
+    cleaned = re.sub(r'\n+', '\n', cleaned)
+    # Remove any remaining '\n' characters
+    cleaned = cleaned.replace('\\n', '')
+    return cleaned
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+openai_api_key = os.environ.get('OPENAI_API_KEY')
+llm = ChatOpenAI(
+    api_key=openai_api_key,
+    model_name="gpt-4-turbo-preview",  # or "gpt-3.5-turbo" for a more economical option
+    temperature=0.7
+)
+@app.get("/")
+def read_root():
+    return {"Hello": "World"}
+class Query(BaseModel):
+    query_text: str
+prompt = ChatPromptTemplate.from_template(
+"""
+You are a helpful assistant designed specifically for the Thapar Institute of Engineering and Technology (TIET), a renowned technical college. Your task is to answer all queries related to TIET. Every response you provide should be relevant to the context of TIET. If a question falls outside of this context, please decline by stating, 'Sorry, I cannot help with that.' If you do not know the answer to a question, do not attempt to fabricate a response; instead, politely decline.
+You may elaborate on your answers slightly to provide more information, but avoid sounding boastful or exaggerating. Stay focused on the context provided.
+If the query is not related to TIET or falls outside the context of education, respond with:
+        "Sorry, I cannot help with that. I'm specifically designed to answer questions about the Thapar Institute of Engineering and Technology.
+        For more information, please contact at our toll-free number: 18002024100 or E-mail us at admissions@thapar.edu
+<context>
+{context}
+</context>
+Question: {input}
+"""
+)
+def vector_embedding():
+    try:
+        file_path = "./data/Data.docx"
+        if not os.path.exists(file_path):
+            print(f"The file {file_path} does not exist.")
+            return {"response": "Error: Data file not found"}
+        loader = DocxLoader(file_path)
+        documents = loader.load()
+        print(f"Loaded document: {file_path}")
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+        chunks = text_splitter.split_documents(documents)
+        print(f"Created {len(chunks)} chunks.")
+        model_name = "BAAI/bge-base-en"
+        encode_kwargs = {'normalize_embeddings': True}
+        model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs)
+        db = FAISS.from_documents(chunks, model_norm)
+        db.save_local("./vectors_db")
+        print("Vector store created and saved successfully.")
+        return {"response": "Vector Store DB Is Ready"}
+    except Exception as e:
+        print(f"An error occurred: {str(e)}")
+        return {"response": f"Error: {str(e)}"}
+def get_embeddings():
+    model_name = "BAAI/bge-base-en"
+    encode_kwargs = {'normalize_embeddings': True}
+    model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs)
+    return model_norm
+@app.post("/chat")  # Changed from /anthropic to /chat
+def read_item(query: Query):
+    try:
+        embeddings = get_embeddings()
+        vectors = FAISS.load_local("./vectors_db", embeddings, allow_dangerous_deserialization=True)
+    except Exception as e:
+        print(f"Error loading vector store: {str(e)}")
+        return {"response": "Vector Store Not Found or Error Loading. Please run /setup first."}
+    prompt1 = query.query_text
+    if prompt1:
+        start = time.process_time()
+        document_chain = create_stuff_documents_chain(llm, prompt)
+        retriever = vectors.as_retriever()
+        retrieval_chain = create_retrieval_chain(retriever, document_chain)
+        response = retrieval_chain.invoke({'input': prompt1})
+        print("Response time:", time.process_time() - start)
+        # Apply the cleaning function to the response
+        cleaned_response = clean_response(response['answer'])
+        # For debugging, print the cleaned response
+        print("Cleaned response:", repr(cleaned_response))
+        return cleaned_response
+    else:
+        return "No Query Found"
+@app.get("/setup")
+def setup():
+    return vector_embedding()
+# Uncomment this to check if the API key is set
+# print(f"API key set: {'Yes' if os.environ.get('OPENAI_API_KEY') else 'No'}")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

data/Data.docx ADDED Viewed

Binary file (344 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,150 @@

+aiofiles==24.1.0
+aiohappyeyeballs==2.4.3
+aiohttp==3.11.7
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.6.2.post1
+attrs==24.2.0
+backoff==2.2.1
+beautifulsoup4==4.12.3
+cachetools==5.5.0
+certifi==2024.8.30
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.0
+click==8.1.7
+cryptography==43.0.3
+dataclasses-json==0.6.7
+distro==1.9.0
+docstring_parser==0.16
+emoji==2.14.0
+eval_type_backport==0.2.0
+faiss-cpu==1.9.0.post1
+fastapi==0.115.5
+filelock==3.16.1
+filetype==1.2.0
+frozenlist==1.5.0
+fsspec==2024.10.0
+google-ai-generativelanguage==0.6.10
+google-api-core==2.23.0
+google-api-python-client==2.154.0
+google-auth==2.36.0
+google-auth-httplib2==0.2.0
+google-cloud-aiplatform==1.73.0
+google-cloud-bigquery==3.27.0
+google-cloud-core==2.4.1
+google-cloud-resource-manager==1.13.1
+google-cloud-storage==2.18.2
+google-crc32c==1.6.0
+google-generativeai==0.8.3
+google-resumable-media==2.7.2
+googleapis-common-protos==1.66.0
+greenlet==3.1.1
+grpc-google-iam-v1==0.13.1
+grpcio==1.68.0
+grpcio-status==1.68.0
+h11==0.14.0
+html5lib==1.1
+httpcore==1.0.7
+httplib2==0.22.0
+httpx==0.27.2
+httpx-sse==0.4.0
+huggingface-hub==0.26.2
+idna==3.10
+Jinja2==3.1.4
+jiter==0.7.1
+joblib==1.4.2
+jsonpatch==1.33
+jsonpath-python==1.0.6
+jsonpointer==3.0.0
+langchain==0.3.8
+langchain-community==0.3.8
+langchain-core==0.3.21
+langchain-google-genai==2.0.5
+langchain-openai==0.2.9
+langchain-text-splitters==0.3.2
+langdetect==1.0.9
+langsmith==0.1.145
+lxml==5.3.0
+MarkupSafe==3.0.2
+marshmallow==3.23.1
+mpmath==1.3.0
+multidict==6.1.0
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+networkx==3.4.2
+nltk==3.9.1
+numpy==1.26.4
+nvidia-cublas-cu12==12.4.5.8
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.2.1.3
+nvidia-curand-cu12==10.3.5.147
+nvidia-cusolver-cu12==11.6.1.9
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-nccl-cu12==2.21.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.4.127
+olefile==0.47
+openai==1.55.0
+orjson==3.10.11
+packaging==24.2
+pillow==11.0.0
+propcache==0.2.0
+proto-plus==1.25.0
+protobuf==5.28.3
+psutil==6.1.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pycparser==2.22
+pydantic==2.9.2
+pydantic-settings==2.6.1
+pydantic_core==2.23.4
+PyMuPDF==1.24.14
+pyparsing==3.2.0
+pypdf==5.1.0
+PyPDF2==3.0.1
+python-dateutil==2.8.2
+python-docx==1.1.2
+python-dotenv==1.0.1
+python-iso639==2024.10.22
+python-magic==0.4.27
+python-oxmsg==0.0.1
+PyYAML==6.0.2
+RapidFuzz==3.10.1
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+rsa==4.9
+safetensors==0.4.5
+scikit-learn==1.5.2
+scipy==1.14.1
+sentence-transformers==3.3.1
+setuptools==75.6.0
+shapely==2.0.6
+six==1.16.0
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.35
+starlette==0.41.3
+sympy==1.13.1
+tenacity==9.0.0
+threadpoolctl==3.5.0
+tiktoken==0.8.0
+tokenizers==0.20.3
+torch==2.5.1
+tqdm==4.67.0
+transformers==4.46.3
+triton==3.1.0
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+unstructured==0.16.6
+unstructured-client==0.28.0
+uritemplate==4.1.1
+urllib3==2.2.3
+uvicorn==0.32.1
+webencodings==0.5.1
+wrapt==1.17.0
+yarl==1.18.0

vectors_db/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ccd7b8fc38a26996f66c51ff0558b2cf1bc7a4e13143acec3be23b86c7d06607
+size 3373101

vectors_db/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2eb5fee5010d291cd821f48562955973db26f26708e5d226ca03c624400ff13b
+size 537595