Junr-syl commited on
Commit
82bc3c8
1 Parent(s): d8eb285

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +22 -0
  2. requirements.txt +5 -0
  3. src/app.py +159 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9-slim-buster
3
+
4
+ # Set environment variables for Flask
5
+ ENV FLASK_APP=app.py
6
+ ENV FLASK_RUN_HOST=0.0.0.0
7
+ ENV FLASK_ENV=development
8
+
9
+ # Set the working directory in the container
10
+ WORKDIR /app
11
+
12
+ # Copy the current directory contents into the container at /app
13
+ COPY . /app
14
+
15
+ # Install any needed packages in requirements.txt
16
+ RUN pip install pip install --no-cache-dir --upgrade -r requirements.txt
17
+
18
+ # Make port 7000-8000 available
19
+ EXPOSE 7000-8000
20
+
21
+ # Define the command to run the Flask app
22
+ CMD ["py", "app/app.py"]
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pandas
2
+ Numpy
3
+ sentence-transformers
4
+ elasticsearch
5
+ Flask
src/app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import pandas as pd
3
+ import numpy as np
4
+ from elasticsearch import Elasticsearch
5
+ from scipy.spatial.distance import cosine
6
+ from sentence_transformers import SentenceTransformer
7
+ import logging
8
+
9
+
10
+ #Creat the flask instance Using create_app
11
+ app=Flask(__name__)
12
+
13
+ # Configure logging
14
+ logging.basicConfig(filename='app.log', level=logging.INFO)
15
+ """
16
+ Functions for request/response validation
17
+ """
18
+ # Define a function for request validation
19
+ def validate_request(request_data):
20
+ # Example: Validate that 'question' is present in the request
21
+ if 'question' not in request_data:
22
+ return False
23
+ return True
24
+
25
+ # Define a function for response validation
26
+ def validate_response(response_data):
27
+ # Example: Validate that 'message' is present in the response
28
+ if 'message' not in response_data:
29
+ return False
30
+ return True
31
+
32
+ """
33
+ Function for preparing csv for indexing
34
+ """
35
+ def prepare_documents(df):
36
+ documents = []
37
+
38
+ for _, row in df.iterrows():
39
+ #row["Embedding"].tolist()
40
+ document = {
41
+ "Passages": row["Passages"],
42
+ "Metadata": row["Metadata"],
43
+ "Embedding": {
44
+ "type": "dense_vector",
45
+ "dims": 3, # Specify the dimensionality of your dense vectors
46
+ "value": row["Embedding"].tolist()
47
+ }}
48
+ documents.append(document)
49
+ return documents
50
+ """
51
+ function for working with retrival responses
52
+ """
53
+ # Extract relevant passages, metadata, and scores
54
+ def Extraction(response,question_embedding):
55
+ relevant_passages = []
56
+ for hit in response["hits"]["hits"]:
57
+ passage = hit["_source"]["Passages"]
58
+ metadata = hit["_source"]["Metadata"]
59
+ #score_1=hit['_score']
60
+ passage_embedding = np.array(hit["_source"]["Embedding"]['value'])
61
+ score = 1 - cosine(question_embedding, passage_embedding) # Calculate cosine similarity
62
+ relevant_passages.append({"passage": passage, "metadata": metadata, "score": score})
63
+
64
+ #Sort the relevant passages by score in descending order
65
+ relevant_passages.sort(key=lambda x: x["score"], reverse=True)
66
+ #Get the top 3 relevant passages and their metadata
67
+ top_3_relevant_passages = relevant_passages[:3]
68
+ return top_3_relevant_passages
69
+
70
+ #create the elastic search instance
71
+ es = Elasticsearch(
72
+ "https://92d997736474439dae5ccfaedc2ad990.us-central1.gcp.cloud.es.io:443",
73
+ api_key="Ym16RzI0b0JIcXpRTU9NQUNUNE46YnBmaUtCWHdTNXlnN1dZR2w4Rllqdw=="
74
+ )
75
+ app.logger.info(msg='es instance created')
76
+ """
77
+ Question asking endpoint
78
+
79
+ """
80
+ # Define an endpoint for receiving a user question via POST request
81
+ @app.route('/ask', methods=['POST'])
82
+ def receive_question():
83
+ model = SentenceTransformer('sentence-transformers/multi-qa-distilbert-cos-v1')
84
+ # Get the question from the request JSON data
85
+ question_data = request.get_json()
86
+ user_question = question_data.get('question')
87
+
88
+ # Validate request data
89
+ if not validate_request(question_data):
90
+ app.logger.error(msg='Invalid request data')
91
+ return jsonify({'error': 'Invalid request data'}), 400
92
+
93
+ #return response
94
+ question = user_question
95
+ question_embedding = model.encode(question)
96
+ question_embedding=question_embedding.tolist()
97
+ #index name created on elasticsearch
98
+ index_name="search-passagemetadataemb"
99
+ #search
100
+ response = es.search(
101
+ index=index_name,
102
+ q=question,
103
+ size=3
104
+ )
105
+ top_3=Extraction(response=response,question_embedding=question_embedding)
106
+ results={}
107
+ id=0 # id for different passages
108
+ for passage_info in top_3:
109
+ results[f"Passage {id}:"]=passage_info["passage"]
110
+ results[f"Metadata {id}:"]= passage_info["metadata"]
111
+ results[f"Score {id}:"]= passage_info["score"]
112
+ id=id+1
113
+
114
+ # Respond with a confirmation message
115
+ response = {'message': 'Question received successfully',
116
+ 'qustion': user_question,
117
+ 'results': results
118
+ }
119
+ # Validate request data
120
+ if not validate_response(response):
121
+ return jsonify({'error': 'Invalid response data'}), 500
122
+ return jsonify(response)
123
+
124
+
125
+ """
126
+ File Upload endpoint
127
+ """
128
+ @app.route('/upload_csv', methods=['POST'])
129
+ def upload_document():
130
+ # Get the uploaded file from the request
131
+ uploaded_file = request.files['file']
132
+
133
+ if uploaded_file:
134
+ app.logger.info(msg='file uploaded')
135
+ # Process the uploaded file
136
+ # Here, we save it with a unique name
137
+ file_path = 'uploads/' + uploaded_file.filename
138
+ uploaded_file.save(file_path)
139
+ df=pd.read_csv(file_path)
140
+
141
+ #Convert embeddings to np array
142
+ df['Embedding'] = df['Embedding'].apply(lambda x: np.fromstring(x.replace('\n', '')[1:-1], sep=' '))
143
+ # Index the document in Elasticsearch
144
+ documents=prepare_documents(df)
145
+
146
+
147
+ # Create a function to prepare documents for indexing
148
+ index_name = "search-passagemetadataemb" #index name created on elasticsearch
149
+ #index
150
+ for doc_id, document in enumerate(documents):
151
+ es.index(index=index_name, body=document, id=doc_id)
152
+
153
+
154
+ return jsonify({'message': 'Document uploaded and indexed successfully'})
155
+
156
+ return jsonify({'message': 'No file uploaded'})
157
+
158
+ if __name__=='__main__':
159
+ app.run(debug=True)