rohanshaw commited on
Commit
e89a1ec
Β·
verified Β·
1 Parent(s): 1b7200e

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +13 -0
  2. app.py +75 -0
  3. chatbot.py +68 -0
  4. chatbotmemory.py +91 -0
  5. dataset.txt +141 -0
  6. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:latest
2
+
3
+ WORKDIR /
4
+
5
+ COPY ./requirements.txt .
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
8
+
9
+ COPY . .
10
+
11
+ EXPOSE 7860
12
+
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+
5
+ from chatbot import Chatbot
6
+
7
+ from chatbotmemory import ChatbotMemory
8
+
9
+ import logging
10
+
11
+ from langchain_core.messages import AIMessage, HumanMessage
12
+
13
+
14
+ app = FastAPI()
15
+
16
+ # Add logging
17
+
18
+ logging.basicConfig(level=logging.INFO)
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
23
+
24
+ handler = logging.StreamHandler()
25
+
26
+ handler.setFormatter(formatter)
27
+
28
+ logger.addHandler(handler)
29
+
30
+ # Add CORS
31
+
32
+ origins = ["*"]
33
+
34
+ app.add_middleware(
35
+ CORSMiddleware,
36
+ allow_origins=origins,
37
+ allow_credentials=True,
38
+ allow_methods=["GET", "POST", "PUT", "DELETE"],
39
+ allow_headers=["*"],
40
+ )
41
+
42
+ bot1 = Chatbot()
43
+ bot2 = ChatbotMemory()
44
+
45
+ @app.get("/")
46
+
47
+ def read_root():
48
+
49
+ return {
50
+
51
+ "message": "API running successfully",
52
+
53
+ "endpoints": [
54
+
55
+ "/chat/v1/",
56
+
57
+ "/chat/v2/",
58
+
59
+ ]
60
+
61
+ }
62
+
63
+ @app.post("/chat/v1/")
64
+ def chat(q: str):
65
+ logger.info(q)
66
+ answer = bot1.rag_chain.invoke(q)
67
+ return {"answer": answer}
68
+
69
+ @app.post("/chat/v2/")
70
+ def chatMemory(q: str):
71
+ chat_history = []
72
+ logger.info(q)
73
+ ai_msg = bot2.rag_chain.invoke({"question": q, "chat_history": chat_history})
74
+ chat_history.extend([HumanMessage(content=q), ai_msg])
75
+ return {"answer": ai_msg}
chatbot.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import CharacterTextSplitter
2
+ from langchain_community.document_loaders import TextLoader
3
+ from langchain.schema.runnable import RunnablePassthrough
4
+ from langchain.schema.output_parser import StrOutputParser
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
8
+ from dotenv import load_dotenv, find_dotenv
9
+ import os
10
+ from pinecone import Pinecone, PodSpec
11
+
12
+ load_dotenv(find_dotenv())
13
+
14
+ class Chatbot():
15
+
16
+ loader = TextLoader('dataset.txt', autodetect_encoding=True)
17
+ documents = loader.load()
18
+ text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=4)
19
+ docs = text_splitter.split_documents(documents)
20
+
21
+ embeddings = GoogleGenerativeAIEmbeddings(
22
+ model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")
23
+ )
24
+
25
+ pinecone = Pinecone(
26
+ api_key=os.environ.get("PINECONE_API_KEY")
27
+ # host='gcp-starter'
28
+ )
29
+
30
+ index_name = "chatbot"
31
+
32
+ if index_name not in pinecone.list_indexes().names():
33
+ pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
34
+ docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
35
+ else:
36
+ docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
37
+
38
+
39
+ llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
40
+
41
+ template = """
42
+ INSTRUCTION: Act as PRAN, Personal Assistant of Rohan Shaw, this is conversation \
43
+ to a user who wants to get his query solved about Rohan Shaw, his professional and personal life. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
44
+ If you don't know any ANSWER, say you don't know \
45
+ Always follow general guardrails before generating any response. \
46
+ Always try to keep the conversation in context to Rohan Shaw. Keep your replies short \
47
+ compassionate and informative.\
48
+ Give the answer from the CONTEXT\
49
+ You should help user to get his query solved and also try to increase engagement and promoting Rohan Shaw.\
50
+
51
+ CONTEXT: {context}
52
+ QUESTION: {question}
53
+ ANSWER:
54
+ """
55
+
56
+ prompt = PromptTemplate(
57
+ template=template,
58
+ input_variables=["context", "question"]
59
+ )
60
+
61
+
62
+ rag_chain = (
63
+ {"context": docsearch.as_retriever(), "question": RunnablePassthrough()}
64
+ | prompt
65
+ | llm
66
+ | StrOutputParser()
67
+ )
68
+
chatbotmemory.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain_community.document_loaders import TextLoader
3
+ from langchain.schema.runnable import RunnablePassthrough
4
+ from langchain.schema.output_parser import StrOutputParser
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
7
+ from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
8
+ from dotenv import load_dotenv, find_dotenv
9
+ import os
10
+ from pinecone import Pinecone, PodSpec
11
+
12
+ load_dotenv(find_dotenv())
13
+
14
+ class ChatbotMemory():
15
+
16
+ loader = TextLoader('dataset.txt', autodetect_encoding=True)
17
+ documents = loader.load()
18
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103)
19
+ docs = text_splitter.split_documents(documents)
20
+
21
+ embeddings = GoogleGenerativeAIEmbeddings(
22
+ model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY"))
23
+
24
+ pinecone = Pinecone(
25
+ api_key=os.environ.get("PINECONE_API_KEY")
26
+ )
27
+
28
+ index_name = "chatbot"
29
+
30
+ if index_name not in pinecone.list_indexes().names():
31
+ pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
32
+ docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
33
+ else:
34
+ docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
35
+
36
+
37
+ llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
38
+
39
+ def contextualized_question(input: dict):
40
+ if input.get("chat_history"):
41
+ llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
42
+
43
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
44
+ which might reference context in the chat history, formulate a standalone question \
45
+ which can be understood without the chat history. Do NOT answer the question, \
46
+ just reformulate it if needed and otherwise return it as is."""
47
+
48
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
49
+ [
50
+ ("system", contextualize_q_system_prompt),
51
+ MessagesPlaceholder(variable_name="chat_history"),
52
+ ("human", "{question}"),
53
+ ]
54
+ )
55
+
56
+ contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
57
+ return contextualize_q_chain
58
+ else:
59
+ return input["question"]
60
+
61
+ template = """
62
+ INSTRUCTION: Act as PRAN, Personal Assistant of Rohan Shaw, this is conversation \
63
+ to a user who wants to get his query solved about Rohan Shaw, his professional and personal life. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
64
+ If you don't know any ANSWER, say you don't know \
65
+ Always follow general guardrails before generating any response. \
66
+ Always try to keep the conversation in context to Rohan Shaw. Keep your replies short \
67
+ compassionate and informative.\
68
+ Give the answer from the CONTEXT\
69
+ You should help user to get his query solved and also try to increase engagement and promoting Rohan Shaw.\
70
+
71
+ CONTEXT: {context}
72
+ QUESTION: {question}
73
+ ANSWER:
74
+ """
75
+
76
+ prompt = ChatPromptTemplate.from_messages(
77
+ [
78
+ ("system", template),
79
+ MessagesPlaceholder(variable_name="chat_history"),
80
+ ("human", "{question}"),
81
+ ]
82
+ )
83
+
84
+
85
+ rag_chain = (
86
+ RunnablePassthrough.assign(
87
+ context=contextualized_question | docsearch.as_retriever()
88
+ )
89
+ | prompt
90
+ | llm
91
+ )
dataset.txt ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Hey I am Rohan Shaw, I am a 18 year old boy who is in his way to win whole world. Dreamed and targeted to be an entreprenuer and start my business, grow it to its fullest, earn respect, fame, power and a lot of money. I completed my 10 from my hometown, 12th from kota, Rajasthan also prepared for NEET and finally destined to do BCA from Silver Oak University in Ahmedabad, Gujarat.
2
+ I start LumaticAI along with my 3 cofounders and friends. I am the CEO of LumaticAI. In LumaticAI we experienced every ups and downs andnow we plan to pivot our business. I am also in my way to start a new venture along with LumaticAI, in the new venture we are planning to start a premium shoes brand whose pricing will be amazing.
3
+
4
+ πŸ”₯ Fuelled by my love for entrepreneurship, technology and coding, I embarked on a remarkable journey from being a Computer Applications graduate at Silver Oak University to co-founding startup, an AI-driven solutions provider.
5
+
6
+ πŸ‘Œ I am fluent in Python, JavaScript, and have good working knowledge in C and C++. With my skills rooted in Artificial Intelligence, Machine Learning, and Data Science, I am often drawn to challenging projects that push the envelope. I am proficient in a plethora of tech-related platforms and tools like Flask, Git, Fastapi, MongoDB, and more.
7
+
8
+ πŸ‘¨β€πŸ’» I've had the chance to implement my skills practically in many domains. I have largely contributed to interesting projects such as real-time garbage detection, Full Body Detection, and many similar intriguing ventures! My hands-on experience ranges from front-end development using HTML, CSS, and JS, back-end processing, to model creation.
9
+
10
+ πŸ† My Enthusiasm for technology even handed me the winning title at the SSIP New India Vibrant Hackathon 2023. Beyond the screen, I served as a campus ambassador at Syncloop.
11
+
12
+ 🌱 Currently nearly in the pursuit of newer frontiers in AI and Machine Learning, I can't wait to dive deeper into these exhilarating fields. I believe in continuous learning to stay updated with the latest trends in tech.
13
+
14
+ πŸ‘ Interested in collaborating on AI or Machine Learning projects? Let's connect!
15
+
16
+ website : https://rohanshaw.pages.dev
17
+ {
18
+ "url": "https://rohanshaw.pages.dev/app/home",
19
+ "title": "Home page,
20
+ },
21
+ {
22
+ "url": "https://rohanshaw.pages.dev/app/achievements",
23
+ "title": "Achievements page",
24
+ "data": "Achievements
25
+ APR 2022
26
+ Intership :
27
+ Completed an Internship in the field of Human Resources. Positioned as Recruiter in Biz Bulls India Pvt. Ltd.
28
+
29
+ JAN 2023
30
+ Book Publishing :
31
+ Published My first book \"The Destined Innovator\". It was an out of the world experience.
32
+
33
+ FEB 2023
34
+ Book Publishing :
35
+ Published my second book \"The Corporate language\".
36
+
37
+ JUNE 2023
38
+ Co-Founded LumaticAI :
39
+ I am really happy to be a part of LumaticAI. We a team of 4 co-founded LumaticAI.
40
+
41
+ AUG 2023
42
+ International Book Sale :
43
+ Really a great achievement to do International Sales of my books \"The Destined Innovator\" and \"The Corporate language\".
44
+
45
+ NOV 2023
46
+ Grand Finalist at Hackathon :
47
+ Selected as Grand Finalist for SSIP New India Vibrant Hackathon 2023.
48
+
49
+ JAN 2024
50
+ CEO :
51
+ It was really an honor to be CEO of LumaticAI.
52
+
53
+ JAN 2024
54
+ Campus Ambassador :
55
+ Selected as Campus Ambassador of Syncloop.
56
+
57
+ JAN 2024
58
+ Campus Ambassador :
59
+ Selected as Campus Ambassador of ONDC.
60
+
61
+ FEB 2024 :
62
+ Campus Ambassador of the Month :
63
+ Selected as Campus Ambassador for the January Month of Syncloop.
64
+
65
+ FEB 2024
66
+ GDSC AI-ML Lead :
67
+ Got an oppurtunity to be an AI-ML Lead of Google Developer Student Club, Silver Oak University.
68
+
69
+ FEB 2024
70
+ Government Project :
71
+ I was selected for a government project for Home Department of Gujarat. The Project was an AI Implementation for smart policing.
72
+
73
+ FEB 2024
74
+ Achiever at Silver Oak University :
75
+ Awarded as an Achiever at Silver Oak University for the year 2023-24.
76
+
77
+ MAR 2024
78
+ Best Core Member :
79
+ Selected as Best Core Member of Google Developer Student Club, Silver Oak University for the February Month."
80
+ },
81
+ {
82
+ "url": "https://rohanshaw.pages.dev/app/connect",
83
+ "title": "Connect Page",
84
+ "data": "Email : rohanshaw.dev@gmail.com
85
+ LinkedIn : https://linkedin.com/in/rohan-shaw-rs
86
+ Github : https://gihub.com/rohan-shaw
87
+ Instagram : https://instagram.com/rohan_._shaw
88
+ X (formerly Twitter) : https://x.com/heyMeRohan
89
+ Current Address : Gota, Ahmedabad, Gujarat, India
90
+ Hometown : Nalhati, West Bengal, India
91
+ "
92
+ },
93
+ {
94
+ "url": "https://rohanshaw.pages.dev/app/projects",
95
+ "title": "Projects Page",
96
+ "data": "↡ MoodMeter - An AI Web App that takes user review or text as input and predicts intent and sentiment behind the text.
97
+
98
+ Inkognito - A web app using which secret messages can be hidden and retrieved securely.
99
+
100
+ RakshAI - An AI along with a web app that detects weapons and criminals in real-time through a real-time database
101
+
102
+ CodeLens - A code comparison web app with code insights.
103
+
104
+ Bongllama 1.1b chat alpha - An Large Language model finetuned on tiny llama 1.1b chat with 10k Bengali chats dataset.
105
+
106
+ Bongstral 7b Instruct alpha - An LLM finetuned on mistral 7b with a dataset of 253k Bengali data.
107
+
108
+ ManoVyadh - Introducing ManoVyadh, a finetuned version of TinyLlama 1.1B Chat on Mental Health Counselling Dataset.
109
+
110
+ Duo Studio Website Clone - A cloned version of Duo Studio Website with Modern Web Development.
111
+
112
+ Automated Web Scraper V1 - This is a high level Node.js web scraping API using Puppeteer and Express.js. It allows you to retrieve a list of URLs from a given starting URL and scrape data from each URL in the list. The scraped data includes the page title and cleaned text content. It can bypass almost any blocker.
113
+
114
+ Garbage Detection V1 - An Garbage Detection and classification Model trained using yolov8 and large dataset of garbages.
115
+
116
+ Automated Web Scraper Selenium V1 - We used Selenuim to automate web scraping from internet for a website. It extracts all texts from a website.
117
+
118
+ Time Series Forecasting FB Prophet - This is an AI model for Time Series Forecasting using Facebook Prophet
119
+
120
+ RealTime Full Body Detection Using MediaPipe - This is an AI model for Realtime Full Body Detection using MediaPipe. It has an accuracy that meets standards.
121
+
122
+ Fake News Detection V1 - This is an AI model for Fake News Detection.
123
+
124
+ Presidential Debate Twitter Sentiment Analysis - This is an AI model for Sentiment Analysis for Presidential Debates from Twitter.
125
+
126
+ CSV2Llama - This is an AI model for CSV Chat with Llama2 using Langchain in CPU.
127
+
128
+ Yonos The code is for a social media website called Yonos. It's a Frontend only site of Social Media. The website is only responsive for desktop and not mobile friendly."
129
+ },
130
+ {
131
+ "url": "https://rohanshaw.pages.dev/app/skills",
132
+ "title": "Skills | Rohan Shaw",
133
+ "data": "HTML5 Proficient in building modern, semantic, and accessible HTML5 structures. CSS3 Skilled in writing modular, reusable, and maintainable CSS3 styles. JavaScript Experienced in building dynamic and interactive web applications using JavaScript. React Proficient in building modern, efficient, and scalable user interfaces with React. Node.js Skilled in developing server-side applications and APIs using Node.js. Python Experienced in developing applications and scripting with Python. Flask Proficient in building web applications using the Flask framework for Python. FastAPI Skilled in creating high-performance APIs with FastAPI, a modern Python web framework. C Knowledgeable in the C programming language for system programming and low-level operations. C++ Experienced in developing applications using the C++ programming language. Docker Proficient in containerizing applications using Docker for consistent deployment and scalability. AI Skilled in developing and implementing artificial intelligence solutions. MongoDB Experienced in working with MongoDB, a popular NoSQL database. SQL Knowledgeable in relational databases and writing SQL queries. Git Proficient in version control and collaborating on projects using Git. Pandas Skilled in data manipulation and analysis using the Pandas library for Python. NumPy Experienced in working with numerical data using the NumPy library for Python. Seaborn Proficient in creating informative and visually appealing statistical graphics using Seaborn. Matplotlib Skilled in creating static, animated, and interactive visualizations with Matplotlib. PyTorch Experienced in developing and training deep learning models using PyTorch. TensorFlow Proficient in building and deploying machine learning models with TensorFlow. OpenCV Skilled in computer vision tasks using the OpenCV library for image and video processing. Azure Experienced in deploying and managing applications on the Microsoft Azure cloud platform. AWS Proficient in leveraging various services offered by Amazon Web Services (AWS). GCP Skilled in utilizing the Google Cloud Platform (GCP) for cloud computing services. Marketing Knowledgeable in marketing strategies and campaigns. Sales Experienced in sales processes and techniques. Operations Skilled in managing and optimizing operational processes. Leadership Proficient in leading teams and driving projects to success. Communication Effective in communicating complex ideas and collaborating with stakeholders."
134
+ },
135
+ {
136
+ "url": "https://rohanshaw.pages.dev/app/startup",
137
+ "title": "Startups | Rohan Shaw",
138
+ "data": "LumaticAI - Provides AI Consultancy, AI Development And AI Implementation Services. - Status : Pivoting - website -> lumaticai.com July, 2023
139
+
140
+ Name Yet Not Decided - Premium Shoes In Best Prices. - Status : In Progress April, 2024"
141
+ }
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.1.6
2
+ langchain-community==0.0.19
3
+ langchain-core==0.1.23
4
+ pinecone-client
5
+ python-dotenv
6
+ fastapi
7
+ langchain_google_genai
8
+ langchain-pinecone
9
+ chardet
10
+ uvicorn