pankaj goyal commited on
Commit
be87664
1 Parent(s): 6272fe5

initial commit

Browse files
Files changed (6) hide show
  1. Dockerfile +156 -0
  2. main.py +74 -0
  3. requirements.txt +25 -0
  4. static/styles.css +57 -0
  5. templates/index.html +25 -0
  6. templates/result.html +18 -0
Dockerfile ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ WORKDIR /app
3
+ COPY requirements.txt /app/requirements.txt
4
+ RUN pip install --no-cache-dir -r requirements.txt
5
+
6
+ # Use the recommended HF_HOME instead of deprecated TRANSFORMERS_CACHE
7
+ ENV HF_HOME=/code/cache/huggingface
8
+
9
+ # Create the directory for the Transformers cache and set permissions
10
+ RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
11
+
12
+ COPY . /app
13
+
14
+ EXPOSE 7860
15
+ # Increase Gunicorn timeout to prevent worker timeout during long initializations
16
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "main:app", "--timeout", "120", "--workers", "2", "--threads", "2"]
17
+
18
+
19
+ # # Use an official Python runtime as a base image
20
+ # FROM python:3.9
21
+
22
+ # # Set the working directory in the container
23
+ # WORKDIR /app
24
+
25
+ # # Copy the requirements file into the container at /app
26
+ # COPY ./requirements.txt /code/requirements.txt
27
+
28
+ # # Install any needed packages specified in requirements.txt
29
+ # RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
30
+
31
+ # # Define environment variable for the Hugging Face home
32
+ # # ENV HF_HOME=/app/cache/huggingface
33
+ # ENV HF_HOME=/code/cache/huggingface
34
+
35
+
36
+ # # Create the directory for the Transformers cache and set permissions
37
+ # RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
38
+
39
+
40
+ # # # Create the directory for the Hugging Face cache
41
+ # # RUN mkdir -p $HF_HOME
42
+ # # Optional: Adjust permissions if necessary
43
+ # # RUN chmod 755 $HF_HOME
44
+
45
+ # # Copy the rest of your application's code into the container at /app
46
+ # # COPY . /app
47
+ # COPY . .
48
+
49
+ # # Define environment variable for the Flask application port
50
+ # # ENV PORT=8080
51
+
52
+ # # # Expose the port the application runs on
53
+ # # EXPOSE 7860
54
+ # CMD ["panel", "server", "/code/app/py", "--address","0.0.0.0","--port", "7860" "--allow-websocket-origin","pankaj100567-Textsimilarity-str`"]
55
+
56
+ # CMD ["gunicorn", "-b", "0.0.0.0:7860", "main:app"]
57
+
58
+ # Run app.py when the container launches
59
+ # CMD ["python", "app.py", "--allow-websocket-origin","pankaj100567-Textsimilarity-str.hf.space"]
60
+ # CMD ["python", "app.py", "--allow-websocket-origin","pankaj100567-Textsimilarity-str.hf.space"]
61
+
62
+
63
+
64
+ # # Use an official Python runtime as a base image
65
+ # FROM python:3.9
66
+
67
+ # # Set the working directory in the container
68
+ # WORKDIR /app
69
+
70
+ # # Copy the requirements file into the container at /app
71
+ # COPY requirements.txt /app/requirements.txt
72
+
73
+ # # Install any needed packages specified in requirements.txt
74
+ # RUN pip install --no-cache-dir -r requirements.txt
75
+
76
+ # # Define environment variable for the Transformers cache
77
+ # ENV TRANSFORMERS_CACHE=/app/cache/huggingface
78
+
79
+ # # Create the directory for the Transformers cache
80
+ # RUN mkdir -p /app/cache/huggingface && chmod 777 /app/cache/huggingface
81
+
82
+ # # Copy the rest of your application's code into the container at /app
83
+ # COPY . /app
84
+
85
+ # # Define environment variable for the Flask application port
86
+ # ENV PORT=8080
87
+
88
+ # # Expose the port the application runs on
89
+ # EXPOSE 8080
90
+
91
+ # # Run app.py when the container launches
92
+ # CMD ["python", "app.py"]
93
+
94
+
95
+ # # Use an official Python runtime as a base image
96
+ # FROM python:3.9
97
+
98
+ # # Set the working directory in the container
99
+ # WORKDIR /app
100
+
101
+ # # Copy the requirements file into the container at /app
102
+ # COPY requirements.txt /app/requirements.txt
103
+
104
+ # # Install any needed packages specified in requirements.txt
105
+ # RUN pip install --no-cache-dir -r requirements.txt
106
+
107
+ # # Copy the rest of your application's code into the container at /app
108
+ # COPY . /app
109
+
110
+ # # Define environment variable
111
+ # ENV PORT 8080
112
+
113
+ # # Run app.py when the container launches
114
+ # CMD ["python", "app.py"]
115
+
116
+
117
+ # FROM python:3.9
118
+
119
+
120
+ # WORKDIR /code
121
+
122
+ # COPY ./requirements.txt /code/requirements.txt
123
+
124
+ # RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
125
+
126
+ # COPY . .
127
+
128
+ # CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
129
+
130
+ # # Use an official Python runtime as a parent image
131
+ # FROM python:3.9
132
+
133
+ # # Set the working directory in the container
134
+ # WORKDIR /code
135
+
136
+ # # Copy the dependencies file to the working directory
137
+ # COPY requirements.txt /code/
138
+
139
+ # # Install any needed packages specified in requirements.txt
140
+ # RUN pip install --no-cache-dir --upgrade -r requirements.txt
141
+
142
+ # # Copy the current directory contents into the container at /code
143
+ # COPY . /code/
144
+
145
+ # # Make port 5000 available to the world outside this container
146
+ # EXPOSE 5000
147
+
148
+ # # Define environment variable
149
+ # ENV FLASK_APP=app.py
150
+ # ENV FLASK_RUN_HOST=0.0.0.0
151
+ # ENV FLASK_RUN_PORT=5000
152
+
153
+ # # Run the application when the container launches
154
+ # # CMD ["flask", "run"]
155
+
156
+ # CMD ["flask","run","panel","server","--allow-websocker-origin","pankaj100567-similarity-measure.hf.space"]
main.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, jsonify, request, render_template
2
+ import torch
3
+ # from langdetect import detect, DetectorFactory
4
+
5
+ import os
6
+ from transformers import XLMRobertaForSequenceClassification, AutoTokenizer
7
+ from transformers import RobertaForSequenceClassification
8
+
9
+ app = Flask(__name__)
10
+
11
+ class Predictor:
12
+ def __init__(self, model, tokenizer, device):
13
+ self.model = model
14
+ self.tokenizer = tokenizer
15
+ self.device = device
16
+
17
+ def predict_similarity(self, sentence1, sentence2):
18
+ try:
19
+ # Tokenize input sentences
20
+ encoded_input = self.tokenizer(sentence1, sentence2, return_tensors='pt', padding=True, truncation=True)
21
+ input_ids = encoded_input['input_ids'].to(self.device)
22
+ attention_mask = encoded_input['attention_mask'].to(self.device)
23
+
24
+ # Perform inference
25
+ with torch.no_grad():
26
+ outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
27
+ logits = outputs.logits
28
+ similarity_score = torch.sigmoid(logits).item() # Assuming binary classification
29
+ return similarity_score
30
+ except Exception as e:
31
+ print(f"Error during model prediction: {e}")
32
+ return 0.0 # Return a default or error value if any exception occurs
33
+
34
+ # Load model and tokenizer
35
+ # model_path = "pankaj100567/semantic_textual_relatedness"
36
+ # model_path="pankaj100567/str_english_model_roberta_large_1stage"
37
+ # model_path= "epoch_1"
38
+ # model_path="pankaj100567/semantic-english-model"
39
+ model_path="pankaj100567/semeval-semantic-texutal-relatedness"
40
+ # cache_dir = "/app/cache/huggingface"
41
+ cache_dir = "/code/cache/huggingface"
42
+ if not os.path.exists(cache_dir):
43
+ try:
44
+ os.makedirs(cache_dir)
45
+ os.chmod(cache_dir, 0o777) # Set directory permissions to read, write, and execute by all users
46
+ except Exception as e:
47
+ print(f"Failed to create or set permissions for directory {cache_dir}: {e}")
48
+
49
+ model = XLMRobertaForSequenceClassification.from_pretrained(model_path, cache_dir= cache_dir, num_labels=1)
50
+ tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large",cache_dir= cache_dir,)
51
+
52
+ # model = XLMRobertaForSequenceClassification.from_pretrained(model_path)
53
+ # tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large")
54
+
55
+ # Device configuration
56
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
57
+ model.to(device)
58
+
59
+ # Initialize Predictor instance
60
+ predictor = Predictor(model, tokenizer, device)
61
+
62
+ @app.route('/')
63
+ def index():
64
+ return render_template('index.html')
65
+
66
+ @app.route('/predict', methods=['POST'])
67
+ def predict():
68
+ sentence1 = request.form['sentence1']
69
+ sentence2 = request.form['sentence2']
70
+ similarity_score = predictor.predict_similarity(sentence1, sentence2)
71
+ return render_template('result.html', sentence1=sentence1, sentence2=sentence2, similarity_score=similarity_score)
72
+
73
+ # if __name__ == '__main__':
74
+ # app.run(debug=True, host='0.0.0.0', port=5002) # Ensure the app is accessible externally
requirements.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gunicorn
2
+ transformers
3
+ accelerate
4
+ einops
5
+ xformers
6
+ bitsandbytes
7
+ huggingface_hub
8
+ pypdf
9
+ torch
10
+ datasets
11
+ numpy
12
+ scipy
13
+ flask
14
+ gunicorn
15
+ jinja2
16
+ pandas
17
+ matplotlib
18
+ tokenizers
19
+ scikit-learn
20
+ gradio
21
+ gunicorn
22
+ nltk
23
+ langdetect
24
+ beautifulsoup4
25
+ Django
static/styles.css ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* styles.css */
2
+
3
+ body {
4
+ font-family: Arial, sans-serif;
5
+ background-color: #d13030;
6
+ margin: 0;
7
+ padding: 0;
8
+ }
9
+
10
+ .container {
11
+ width: 80%;
12
+ margin: 50px auto;
13
+ background-color: #310c0c;
14
+ padding: 20px;
15
+ border-radius: 10px;
16
+ box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
17
+ }
18
+
19
+ h1 {
20
+ text-align: center;
21
+ color: #333;
22
+ }
23
+
24
+ .input-group {
25
+ margin-bottom: 20px;
26
+ }
27
+
28
+ .input-group label {
29
+ display: block;
30
+ margin-bottom: 5px;
31
+ font-weight: bold;
32
+ }
33
+
34
+ .input-group input[type="text"] {
35
+ width: 100%;
36
+ padding: 10px;
37
+ font-size: 16px;
38
+ border: 1px solid #ccc;
39
+ border-radius: 5px;
40
+ }
41
+
42
+ button {
43
+ display: block;
44
+ width: 100%;
45
+ padding: 10px;
46
+ font-size: 16px;
47
+ background-color: #007bff;
48
+ color: #fff;
49
+ border: none;
50
+ border-radius: 5px;
51
+ cursor: pointer;
52
+ transition: background-color 0.3s;
53
+ }
54
+
55
+ button:hover {
56
+ background-color: #0056b3;
57
+ }
templates/index.html ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Textual Similarity Predictor</title>
7
+ <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
8
+ </head>
9
+ <body>
10
+ <div class="container">
11
+ <h1>Textual Similarity Predictor</h1>
12
+ <form id="predictForm" action="/predict" method="post">
13
+ <div class="input-group">
14
+ <label for="sentence1">Enter Sentence 1:</label>
15
+ <input type="text" id="sentence1" name="sentence1" required>
16
+ </div>
17
+ <div class="input-group">
18
+ <label for="sentence2">Enter Sentence 2:</label>
19
+ <input type="text" id="sentence2" name="sentence2" required>
20
+ </div>
21
+ <button type="submit">Predict Similarity</button>
22
+ </form>
23
+ </div>
24
+ </body>
25
+ </html>
templates/result.html ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!DOCTYPE html>
3
+ <html lang="en">
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Similarity Result</title>
8
+ <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
9
+ </head>
10
+ <body>
11
+ <div class="container">
12
+ <h1>Textual Similarity Result</h1>
13
+ <p><strong>Sentence 1:</strong> {{ sentence1 }}</p>
14
+ <p><strong>Sentence 2:</strong> {{ sentence2 }}</p>
15
+ <p><strong>Similarity Score:</strong> {{ similarity_score }}</p>
16
+ </div>
17
+ </body>
18
+ </html>