Rahatara commited on
Commit
4145b52
1 Parent(s): 9f2b9c7

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -139
app.py DELETED
@@ -1,139 +0,0 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
- from typing import List, Tuple
4
- import fitz # PyMuPDF
5
- from sentence_transformers import SentenceTransformer
6
- import numpy as np
7
- import faiss
8
-
9
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
10
- #client = InferenceClient("meta-llama/Llama-2-7b-chat-hf")
11
-
12
- # Placeholder for the app's state
13
- class MyApp:
14
- def __init__(self) -> None:
15
- self.documents = []
16
- self.embeddings = None
17
- self.index = None
18
-
19
- def load_pdfs(self, file_paths: List[str]) -> None:
20
- """Extracts text from multiple PDF files and stores it in the app's documents."""
21
- self.documents = []
22
- for file_path in file_paths:
23
- doc = fitz.open(file_path)
24
- for page_num in range(len(doc)):
25
- page = doc[page_num]
26
- text = page.get_text()
27
- self.documents.append({"page": page_num + 1, "content": text, "file": file_path})
28
- print("PDFs processed successfully!")
29
-
30
- def build_vector_db(self) -> None:
31
- """Builds a vector database using the content of the PDFs."""
32
- model = SentenceTransformer('all-MiniLM-L6-v2')
33
- self.embeddings = model.encode([doc["content"] for doc in self.documents], show_progress_bar=True)
34
- self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
35
- self.index.add(np.array(self.embeddings))
36
- print("Vector database built successfully!")
37
-
38
- def search_documents(self, query: str, k: int = 3) -> List[str]:
39
- """Searches for relevant documents using vector similarity."""
40
- model = SentenceTransformer('all-MiniLM-L6-v2')
41
- query_embedding = model.encode([query], show_progress_bar=False)
42
- D, I = self.index.search(np.array(query_embedding), k)
43
- results = [self.documents[i]["content"] for i in I[0]]
44
- return results if results else ["No relevant documents found."]
45
-
46
- app = MyApp()
47
-
48
- def preprocess_response(response: str) -> str:
49
- """Preprocesses the response to make it more polished and empathetic."""
50
- response = response.strip()
51
- response = response.replace("\n\n", "\n")
52
- response = response.replace(" ,", ",")
53
- response = response.replace(" .", ".")
54
- response = " ".join(response.split())
55
- if not any(word in response.lower() for word in ["sorry", "apologize", "empathy"]):
56
- response = "I'm here to help. " + response
57
- return response
58
-
59
- def shorten_response(response: str) -> str:
60
- """Uses the Zephyr model to shorten and refine the response."""
61
- messages = [{"role": "system", "content": "Greet, Shorten and refine this response in a supportive and empathetic manner."}, {"role": "user", "content": response}]
62
- result = client.chat_completion(messages, max_tokens=512, temperature=0.5, top_p=0.9)
63
- return result.choices[0].message['content'].strip()
64
-
65
- def respond(message: str, history: List[Tuple[str, str]], system_message: str):
66
- messages = [{"role": "system", "content": system_message}]
67
-
68
- for val in history:
69
- if val[0]:
70
- messages.append({"role": "user", "content": val[0]})
71
- if val[1]:
72
- messages.append({"role": "assistant", "content": val[1]})
73
-
74
- messages.append({"role": "user", "content": message})
75
-
76
- # RAG - Retrieve relevant documents if the query suggests exercises or specific information
77
- if any(keyword in message.lower() for keyword in ["exercise", "technique", "information", "guide", "help", "how to"]):
78
- retrieved_docs = app.search_documents(message)
79
- context = "\n".join(retrieved_docs)
80
- if context.strip():
81
- messages.append({"role": "system", "content": "Relevant documents: " + context})
82
-
83
- response = client.chat_completion(messages, max_tokens=1024, temperature=0.7, top_p=0.9)
84
- response_content = "".join([choice.message['content'] for choice in response.choices if 'content' in choice.message])
85
-
86
- polished_response = preprocess_response(response_content)
87
- shortened_response = shorten_response(polished_response)
88
-
89
- history.append((message, shortened_response))
90
- return history, ""
91
-
92
- with gr.Blocks() as demo:
93
- gr.Markdown("# 🧘‍♀️ **Dialectical Behaviour Therapy**")
94
- gr.Markdown(
95
- "‼️Disclaimer: This chatbot is based on a DBT exercise book that is publicly available. "
96
- "We are not medical practitioners, and the use of this chatbot is at your own responsibility."
97
- )
98
-
99
- chatbot = gr.Chatbot()
100
-
101
- with gr.Row():
102
- pdf_input = gr.File(label="Upload PDFs (minimum 5)", file_count="multiple", type="file")
103
- txt_input = gr.Textbox(
104
- show_label=False,
105
- placeholder="Type your message here...",
106
- lines=1
107
- )
108
- system_input = gr.Textbox(
109
- label="System Message",
110
- placeholder="Enter a system message here...",
111
- lines=2
112
- )
113
- submit_btn = gr.Button("Submit", scale=1)
114
- refresh_btn = gr.Button("Refresh Chat", scale=1, variant="secondary")
115
-
116
- example_questions = [
117
- ["What are some ways to cope with stress using DBT?"],
118
- ["Can you guide me through a grounding exercise?"],
119
- ["How do I use DBT skills to handle intense emotions?"],
120
- ["What are some self-soothing techniques I can practice?"]
121
- ]
122
-
123
- gr.Examples(examples=example_questions, inputs=[txt_input])
124
-
125
- def load_and_build_pdfs(pdfs):
126
- file_paths = [pdf.name for pdf in pdfs]
127
- if len(file_paths) < 5:
128
- return [], "Please upload at least 5 PDFs."
129
- app.load_pdfs(file_paths)
130
- app.build_vector_db()
131
- return []
132
-
133
- submit_btn.click(fn=load_and_build_pdfs, inputs=[pdf_input], outputs=[])
134
-
135
- submit_btn.click(fn=respond, inputs=[txt_input, chatbot, system_input], outputs=[chatbot, txt_input])
136
- refresh_btn.click(lambda: [], None, chatbot)
137
-
138
- if __name__ == "__main__":
139
- demo.launch()