Spaces:
Runtime error
Runtime error
pankaj goyal
commited on
Commit
•
be87664
1
Parent(s):
6272fe5
initial commit
Browse files- Dockerfile +156 -0
- main.py +74 -0
- requirements.txt +25 -0
- static/styles.css +57 -0
- templates/index.html +25 -0
- templates/result.html +18 -0
Dockerfile
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
WORKDIR /app
|
3 |
+
COPY requirements.txt /app/requirements.txt
|
4 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
5 |
+
|
6 |
+
# Use the recommended HF_HOME instead of deprecated TRANSFORMERS_CACHE
|
7 |
+
ENV HF_HOME=/code/cache/huggingface
|
8 |
+
|
9 |
+
# Create the directory for the Transformers cache and set permissions
|
10 |
+
RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
|
11 |
+
|
12 |
+
COPY . /app
|
13 |
+
|
14 |
+
EXPOSE 7860
|
15 |
+
# Increase Gunicorn timeout to prevent worker timeout during long initializations
|
16 |
+
CMD ["gunicorn", "-b", "0.0.0.0:7860", "main:app", "--timeout", "120", "--workers", "2", "--threads", "2"]
|
17 |
+
|
18 |
+
|
19 |
+
# # Use an official Python runtime as a base image
|
20 |
+
# FROM python:3.9
|
21 |
+
|
22 |
+
# # Set the working directory in the container
|
23 |
+
# WORKDIR /app
|
24 |
+
|
25 |
+
# # Copy the requirements file into the container at /app
|
26 |
+
# COPY ./requirements.txt /code/requirements.txt
|
27 |
+
|
28 |
+
# # Install any needed packages specified in requirements.txt
|
29 |
+
# RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
30 |
+
|
31 |
+
# # Define environment variable for the Hugging Face home
|
32 |
+
# # ENV HF_HOME=/app/cache/huggingface
|
33 |
+
# ENV HF_HOME=/code/cache/huggingface
|
34 |
+
|
35 |
+
|
36 |
+
# # Create the directory for the Transformers cache and set permissions
|
37 |
+
# RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
|
38 |
+
|
39 |
+
|
40 |
+
# # # Create the directory for the Hugging Face cache
|
41 |
+
# # RUN mkdir -p $HF_HOME
|
42 |
+
# # Optional: Adjust permissions if necessary
|
43 |
+
# # RUN chmod 755 $HF_HOME
|
44 |
+
|
45 |
+
# # Copy the rest of your application's code into the container at /app
|
46 |
+
# # COPY . /app
|
47 |
+
# COPY . .
|
48 |
+
|
49 |
+
# # Define environment variable for the Flask application port
|
50 |
+
# # ENV PORT=8080
|
51 |
+
|
52 |
+
# # # Expose the port the application runs on
|
53 |
+
# # EXPOSE 7860
|
54 |
+
# CMD ["panel", "server", "/code/app/py", "--address","0.0.0.0","--port", "7860" "--allow-websocket-origin","pankaj100567-Textsimilarity-str`"]
|
55 |
+
|
56 |
+
# CMD ["gunicorn", "-b", "0.0.0.0:7860", "main:app"]
|
57 |
+
|
58 |
+
# Run app.py when the container launches
|
59 |
+
# CMD ["python", "app.py", "--allow-websocket-origin","pankaj100567-Textsimilarity-str.hf.space"]
|
60 |
+
# CMD ["python", "app.py", "--allow-websocket-origin","pankaj100567-Textsimilarity-str.hf.space"]
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
# # Use an official Python runtime as a base image
|
65 |
+
# FROM python:3.9
|
66 |
+
|
67 |
+
# # Set the working directory in the container
|
68 |
+
# WORKDIR /app
|
69 |
+
|
70 |
+
# # Copy the requirements file into the container at /app
|
71 |
+
# COPY requirements.txt /app/requirements.txt
|
72 |
+
|
73 |
+
# # Install any needed packages specified in requirements.txt
|
74 |
+
# RUN pip install --no-cache-dir -r requirements.txt
|
75 |
+
|
76 |
+
# # Define environment variable for the Transformers cache
|
77 |
+
# ENV TRANSFORMERS_CACHE=/app/cache/huggingface
|
78 |
+
|
79 |
+
# # Create the directory for the Transformers cache
|
80 |
+
# RUN mkdir -p /app/cache/huggingface && chmod 777 /app/cache/huggingface
|
81 |
+
|
82 |
+
# # Copy the rest of your application's code into the container at /app
|
83 |
+
# COPY . /app
|
84 |
+
|
85 |
+
# # Define environment variable for the Flask application port
|
86 |
+
# ENV PORT=8080
|
87 |
+
|
88 |
+
# # Expose the port the application runs on
|
89 |
+
# EXPOSE 8080
|
90 |
+
|
91 |
+
# # Run app.py when the container launches
|
92 |
+
# CMD ["python", "app.py"]
|
93 |
+
|
94 |
+
|
95 |
+
# # Use an official Python runtime as a base image
|
96 |
+
# FROM python:3.9
|
97 |
+
|
98 |
+
# # Set the working directory in the container
|
99 |
+
# WORKDIR /app
|
100 |
+
|
101 |
+
# # Copy the requirements file into the container at /app
|
102 |
+
# COPY requirements.txt /app/requirements.txt
|
103 |
+
|
104 |
+
# # Install any needed packages specified in requirements.txt
|
105 |
+
# RUN pip install --no-cache-dir -r requirements.txt
|
106 |
+
|
107 |
+
# # Copy the rest of your application's code into the container at /app
|
108 |
+
# COPY . /app
|
109 |
+
|
110 |
+
# # Define environment variable
|
111 |
+
# ENV PORT 8080
|
112 |
+
|
113 |
+
# # Run app.py when the container launches
|
114 |
+
# CMD ["python", "app.py"]
|
115 |
+
|
116 |
+
|
117 |
+
# FROM python:3.9
|
118 |
+
|
119 |
+
|
120 |
+
# WORKDIR /code
|
121 |
+
|
122 |
+
# COPY ./requirements.txt /code/requirements.txt
|
123 |
+
|
124 |
+
# RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
125 |
+
|
126 |
+
# COPY . .
|
127 |
+
|
128 |
+
# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
129 |
+
|
130 |
+
# # Use an official Python runtime as a parent image
|
131 |
+
# FROM python:3.9
|
132 |
+
|
133 |
+
# # Set the working directory in the container
|
134 |
+
# WORKDIR /code
|
135 |
+
|
136 |
+
# # Copy the dependencies file to the working directory
|
137 |
+
# COPY requirements.txt /code/
|
138 |
+
|
139 |
+
# # Install any needed packages specified in requirements.txt
|
140 |
+
# RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
141 |
+
|
142 |
+
# # Copy the current directory contents into the container at /code
|
143 |
+
# COPY . /code/
|
144 |
+
|
145 |
+
# # Make port 5000 available to the world outside this container
|
146 |
+
# EXPOSE 5000
|
147 |
+
|
148 |
+
# # Define environment variable
|
149 |
+
# ENV FLASK_APP=app.py
|
150 |
+
# ENV FLASK_RUN_HOST=0.0.0.0
|
151 |
+
# ENV FLASK_RUN_PORT=5000
|
152 |
+
|
153 |
+
# # Run the application when the container launches
|
154 |
+
# # CMD ["flask", "run"]
|
155 |
+
|
156 |
+
# CMD ["flask","run","panel","server","--allow-websocker-origin","pankaj100567-similarity-measure.hf.space"]
|
main.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, jsonify, request, render_template
|
2 |
+
import torch
|
3 |
+
# from langdetect import detect, DetectorFactory
|
4 |
+
|
5 |
+
import os
|
6 |
+
from transformers import XLMRobertaForSequenceClassification, AutoTokenizer
|
7 |
+
from transformers import RobertaForSequenceClassification
|
8 |
+
|
9 |
+
app = Flask(__name__)
|
10 |
+
|
11 |
+
class Predictor:
|
12 |
+
def __init__(self, model, tokenizer, device):
|
13 |
+
self.model = model
|
14 |
+
self.tokenizer = tokenizer
|
15 |
+
self.device = device
|
16 |
+
|
17 |
+
def predict_similarity(self, sentence1, sentence2):
|
18 |
+
try:
|
19 |
+
# Tokenize input sentences
|
20 |
+
encoded_input = self.tokenizer(sentence1, sentence2, return_tensors='pt', padding=True, truncation=True)
|
21 |
+
input_ids = encoded_input['input_ids'].to(self.device)
|
22 |
+
attention_mask = encoded_input['attention_mask'].to(self.device)
|
23 |
+
|
24 |
+
# Perform inference
|
25 |
+
with torch.no_grad():
|
26 |
+
outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
|
27 |
+
logits = outputs.logits
|
28 |
+
similarity_score = torch.sigmoid(logits).item() # Assuming binary classification
|
29 |
+
return similarity_score
|
30 |
+
except Exception as e:
|
31 |
+
print(f"Error during model prediction: {e}")
|
32 |
+
return 0.0 # Return a default or error value if any exception occurs
|
33 |
+
|
34 |
+
# Load model and tokenizer
|
35 |
+
# model_path = "pankaj100567/semantic_textual_relatedness"
|
36 |
+
# model_path="pankaj100567/str_english_model_roberta_large_1stage"
|
37 |
+
# model_path= "epoch_1"
|
38 |
+
# model_path="pankaj100567/semantic-english-model"
|
39 |
+
model_path="pankaj100567/semeval-semantic-texutal-relatedness"
|
40 |
+
# cache_dir = "/app/cache/huggingface"
|
41 |
+
cache_dir = "/code/cache/huggingface"
|
42 |
+
if not os.path.exists(cache_dir):
|
43 |
+
try:
|
44 |
+
os.makedirs(cache_dir)
|
45 |
+
os.chmod(cache_dir, 0o777) # Set directory permissions to read, write, and execute by all users
|
46 |
+
except Exception as e:
|
47 |
+
print(f"Failed to create or set permissions for directory {cache_dir}: {e}")
|
48 |
+
|
49 |
+
model = XLMRobertaForSequenceClassification.from_pretrained(model_path, cache_dir= cache_dir, num_labels=1)
|
50 |
+
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large",cache_dir= cache_dir,)
|
51 |
+
|
52 |
+
# model = XLMRobertaForSequenceClassification.from_pretrained(model_path)
|
53 |
+
# tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large")
|
54 |
+
|
55 |
+
# Device configuration
|
56 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
57 |
+
model.to(device)
|
58 |
+
|
59 |
+
# Initialize Predictor instance
|
60 |
+
predictor = Predictor(model, tokenizer, device)
|
61 |
+
|
62 |
+
@app.route('/')
|
63 |
+
def index():
|
64 |
+
return render_template('index.html')
|
65 |
+
|
66 |
+
@app.route('/predict', methods=['POST'])
|
67 |
+
def predict():
|
68 |
+
sentence1 = request.form['sentence1']
|
69 |
+
sentence2 = request.form['sentence2']
|
70 |
+
similarity_score = predictor.predict_similarity(sentence1, sentence2)
|
71 |
+
return render_template('result.html', sentence1=sentence1, sentence2=sentence2, similarity_score=similarity_score)
|
72 |
+
|
73 |
+
# if __name__ == '__main__':
|
74 |
+
# app.run(debug=True, host='0.0.0.0', port=5002) # Ensure the app is accessible externally
|
requirements.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gunicorn
|
2 |
+
transformers
|
3 |
+
accelerate
|
4 |
+
einops
|
5 |
+
xformers
|
6 |
+
bitsandbytes
|
7 |
+
huggingface_hub
|
8 |
+
pypdf
|
9 |
+
torch
|
10 |
+
datasets
|
11 |
+
numpy
|
12 |
+
scipy
|
13 |
+
flask
|
14 |
+
gunicorn
|
15 |
+
jinja2
|
16 |
+
pandas
|
17 |
+
matplotlib
|
18 |
+
tokenizers
|
19 |
+
scikit-learn
|
20 |
+
gradio
|
21 |
+
gunicorn
|
22 |
+
nltk
|
23 |
+
langdetect
|
24 |
+
beautifulsoup4
|
25 |
+
Django
|
static/styles.css
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* styles.css */
|
2 |
+
|
3 |
+
body {
|
4 |
+
font-family: Arial, sans-serif;
|
5 |
+
background-color: #d13030;
|
6 |
+
margin: 0;
|
7 |
+
padding: 0;
|
8 |
+
}
|
9 |
+
|
10 |
+
.container {
|
11 |
+
width: 80%;
|
12 |
+
margin: 50px auto;
|
13 |
+
background-color: #310c0c;
|
14 |
+
padding: 20px;
|
15 |
+
border-radius: 10px;
|
16 |
+
box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
|
17 |
+
}
|
18 |
+
|
19 |
+
h1 {
|
20 |
+
text-align: center;
|
21 |
+
color: #333;
|
22 |
+
}
|
23 |
+
|
24 |
+
.input-group {
|
25 |
+
margin-bottom: 20px;
|
26 |
+
}
|
27 |
+
|
28 |
+
.input-group label {
|
29 |
+
display: block;
|
30 |
+
margin-bottom: 5px;
|
31 |
+
font-weight: bold;
|
32 |
+
}
|
33 |
+
|
34 |
+
.input-group input[type="text"] {
|
35 |
+
width: 100%;
|
36 |
+
padding: 10px;
|
37 |
+
font-size: 16px;
|
38 |
+
border: 1px solid #ccc;
|
39 |
+
border-radius: 5px;
|
40 |
+
}
|
41 |
+
|
42 |
+
button {
|
43 |
+
display: block;
|
44 |
+
width: 100%;
|
45 |
+
padding: 10px;
|
46 |
+
font-size: 16px;
|
47 |
+
background-color: #007bff;
|
48 |
+
color: #fff;
|
49 |
+
border: none;
|
50 |
+
border-radius: 5px;
|
51 |
+
cursor: pointer;
|
52 |
+
transition: background-color 0.3s;
|
53 |
+
}
|
54 |
+
|
55 |
+
button:hover {
|
56 |
+
background-color: #0056b3;
|
57 |
+
}
|
templates/index.html
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Textual Similarity Predictor</title>
|
7 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
|
8 |
+
</head>
|
9 |
+
<body>
|
10 |
+
<div class="container">
|
11 |
+
<h1>Textual Similarity Predictor</h1>
|
12 |
+
<form id="predictForm" action="/predict" method="post">
|
13 |
+
<div class="input-group">
|
14 |
+
<label for="sentence1">Enter Sentence 1:</label>
|
15 |
+
<input type="text" id="sentence1" name="sentence1" required>
|
16 |
+
</div>
|
17 |
+
<div class="input-group">
|
18 |
+
<label for="sentence2">Enter Sentence 2:</label>
|
19 |
+
<input type="text" id="sentence2" name="sentence2" required>
|
20 |
+
</div>
|
21 |
+
<button type="submit">Predict Similarity</button>
|
22 |
+
</form>
|
23 |
+
</div>
|
24 |
+
</body>
|
25 |
+
</html>
|
templates/result.html
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
<!DOCTYPE html>
|
3 |
+
<html lang="en">
|
4 |
+
<head>
|
5 |
+
<meta charset="UTF-8">
|
6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
7 |
+
<title>Similarity Result</title>
|
8 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
|
9 |
+
</head>
|
10 |
+
<body>
|
11 |
+
<div class="container">
|
12 |
+
<h1>Textual Similarity Result</h1>
|
13 |
+
<p><strong>Sentence 1:</strong> {{ sentence1 }}</p>
|
14 |
+
<p><strong>Sentence 2:</strong> {{ sentence2 }}</p>
|
15 |
+
<p><strong>Similarity Score:</strong> {{ similarity_score }}</p>
|
16 |
+
</div>
|
17 |
+
</body>
|
18 |
+
</html>
|