rakshith commited on
Commit
1950d4b
·
1 Parent(s): da4afb2

added server

Browse files
Files changed (5) hide show
  1. Dockerfile +22 -0
  2. README.md +46 -0
  3. app.py +144 -0
  4. requirements.txt +6 -0
  5. rsvp-server.py +152 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Download spaCy model
10
+ RUN python -m spacy download en_core_web_trf
11
+
12
+ # Copy application code
13
+ COPY . .
14
+
15
+ # Create logs directory
16
+ RUN mkdir -p logs
17
+
18
+ # Expose port
19
+ EXPOSE 5000
20
+
21
+ # Run the application with uvicorn
22
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "5000"]
README.md CHANGED
@@ -9,3 +9,49 @@ license: mit
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
+
13
+ # RSVP Server
14
+
15
+ A Flask API server that provides natural language processing capabilities for RSVP applications.
16
+
17
+ ## Features
18
+
19
+ - POS (Part of Speech) tagging
20
+ - Named Entity Recognition
21
+ - Token grouping for RSVP reading
22
+
23
+ ## Deployment on Hugging Face Spaces
24
+
25
+ This application is designed to be deployed on Hugging Face Spaces:
26
+
27
+ 1. Create a new Space on Hugging Face
28
+ 2. Choose "Docker" as the template
29
+ 3. Upload all files to the repository
30
+ 4. The Dockerfile will handle the setup and dependencies
31
+ 5. The application will be deployed using Uvicorn, a high-performance ASGI server
32
+
33
+ ## API Endpoints
34
+
35
+ ### Health Check
36
+ ```
37
+ GET /health
38
+ ```
39
+
40
+ ### POS Tagging
41
+ ```
42
+ POST /pos-tag
43
+ Content-Type: application/json
44
+
45
+ {
46
+ "text": "Your text to analyze"
47
+ }
48
+ ```
49
+
50
+ ## Local Development
51
+
52
+ Build and run the Docker container:
53
+
54
+ ```bash
55
+ docker build -t rsvp-server .
56
+ docker run -p 5000:5000 rsvp-server
57
+ ```
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from flask_cors import CORS
3
+ import spacy
4
+ import os
5
+ import logging
6
+ from logging.handlers import RotatingFileHandler
7
+ from werkzeug.middleware.proxy_fix import ProxyFix
8
+
9
+ # Initialize logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Create logs directory if it doesn't exist
14
+ if not os.path.exists('logs'):
15
+ os.makedirs('logs')
16
+
17
+ # Add file handler for logging
18
+ file_handler = RotatingFileHandler('logs/app.log', maxBytes=10240, backupCount=10)
19
+ file_handler.setFormatter(logging.Formatter(
20
+ '%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]'
21
+ ))
22
+ logger.addHandler(file_handler)
23
+
24
+ # Initialize Flask app
25
+ app = Flask(__name__)
26
+ app.wsgi_app = ProxyFix(app.wsgi_app) # For proper handling behind proxy servers
27
+
28
+ # Configure CORS
29
+ CORS(app, resources={r"/*": {"origins": "*"}})
30
+
31
+ # Load environment variables or use defaults
32
+ PORT = int(os.environ.get('PORT', 5000))
33
+ DEBUG = os.environ.get('DEBUG', 'False').lower() == 'true'
34
+ HOST = os.environ.get('HOST', '0.0.0.0')
35
+
36
+ try:
37
+ # Load spaCy model
38
+ logger.info("Loading spaCy model...")
39
+ nlp = spacy.load('en_core_web_trf')
40
+ logger.info("spaCy model loaded successfully")
41
+ except Exception as e:
42
+ logger.error(f"Failed to load spaCy model: {str(e)}")
43
+ raise
44
+
45
+ # Define tag groups for RSVP
46
+ DELAY_GROUPS = {
47
+ 'named_entity': [],
48
+ 'content': ['NOUN', 'PROPN', 'VERB', 'ADJ', 'INTJ'],
49
+ 'function': ['DET', 'PRON', 'ADP', 'AUX', 'CCONJ', 'SCONJ'],
50
+ 'modifier': ['ADV', 'NUM'],
51
+ 'punctuation': ['PUNCT', 'SPACE', 'SYM']
52
+ }
53
+
54
+ def get_group(token):
55
+ """Return the group name for a given token, considering both NER and POS"""
56
+ if token.ent_type_:
57
+ return 'named_entity'
58
+
59
+ pos_tag = token.pos_
60
+ for group, tags in DELAY_GROUPS.items():
61
+ if pos_tag in tags:
62
+ return group
63
+ return 'other'
64
+
65
+ def combine_with_punctuation(doc):
66
+ """Combine tokens with following punctuation and get their groups"""
67
+ combined_tokens = []
68
+ combined_groups = []
69
+ current_token = ""
70
+ current_group = None
71
+
72
+ for i, token in enumerate(doc):
73
+ if token.pos_ in DELAY_GROUPS['punctuation']:
74
+ if current_token:
75
+ current_token += token.text
76
+ else:
77
+ current_token = token.text
78
+ current_group = get_group(token)
79
+ else:
80
+ if current_token:
81
+ combined_tokens.append(current_token)
82
+ combined_groups.append(current_group)
83
+ current_token = token.text
84
+ current_group = get_group(token)
85
+
86
+ if i == len(doc) - 1:
87
+ combined_tokens.append(current_token)
88
+ combined_groups.append(current_group)
89
+
90
+ return combined_tokens, combined_groups
91
+
92
+ @app.route('/health', methods=['GET'])
93
+ def health_check():
94
+ """Health check endpoint"""
95
+ return jsonify({'status': 'healthy', 'model': 'en_core_web_md'})
96
+
97
+ @app.route('/pos-tag', methods=['POST'])
98
+ def pos_tag():
99
+ try:
100
+ data = request.get_json()
101
+
102
+ if not data:
103
+ logger.warning("No JSON data in request")
104
+ return jsonify({'error': 'No JSON data provided'}), 400
105
+
106
+ if 'text' not in data:
107
+ logger.warning("No 'text' field in request data")
108
+ return jsonify({'error': 'Please provide text in the request body'}), 400
109
+
110
+ text = data['text']
111
+
112
+ if not text.strip():
113
+ logger.warning("Empty text provided")
114
+ return jsonify({'error': 'Text cannot be empty'}), 400
115
+
116
+ logger.info(f"Processing text of length: {len(text)}")
117
+ doc = nlp(text)
118
+
119
+ tokens, groups = combine_with_punctuation(doc)
120
+
121
+ response = {
122
+ 'tokens': tokens,
123
+ 'groups': groups,
124
+ }
125
+
126
+ logger.info(f"Successfully processed text with {len(tokens)} tokens")
127
+ return jsonify(response)
128
+
129
+ except Exception as e:
130
+ logger.error(f"Error processing request: {str(e)}", exc_info=True)
131
+ return jsonify({'error': 'Internal server error'}), 500
132
+
133
+ @app.errorhandler(404)
134
+ def not_found_error(error):
135
+ return jsonify({'error': 'Not found'}), 404
136
+
137
+ @app.errorhandler(500)
138
+ def internal_error(error):
139
+ logger.error('Server Error', exc_info=True)
140
+ return jsonify({'error': 'Internal server error'}), 500
141
+
142
+ if __name__ == '__main__':
143
+ logger.info(f"Starting server on {HOST}:{PORT}")
144
+ app.run(host=HOST, port=PORT, debug=DEBUG)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Flask==2.0.1
2
+ flask-cors==3.0.10
3
+ spacy==3.4.0
4
+ werkzeug==2.0.2
5
+ uvicorn==0.15.0
6
+ gunicorn==20.1.0
rsvp-server.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from flask_cors import CORS
3
+ import spacy
4
+ import os
5
+ import logging
6
+ from logging.handlers import RotatingFileHandler
7
+ from werkzeug.middleware.proxy_fix import ProxyFix
8
+
9
+ # Initialize logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Create logs directory if it doesn't exist
14
+ if not os.path.exists('logs'):
15
+ os.makedirs('logs')
16
+
17
+ # Add file handler for logging
18
+ file_handler = RotatingFileHandler('logs/app.log', maxBytes=10240, backupCount=10)
19
+ file_handler.setFormatter(logging.Formatter(
20
+ '%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]'
21
+ ))
22
+ logger.addHandler(file_handler)
23
+
24
+ # Initialize Flask app
25
+ app = Flask(__name__)
26
+ app.wsgi_app = ProxyFix(app.wsgi_app) # For proper handling behind proxy servers
27
+
28
+ # Configure CORS
29
+ CORS(app, resources={r"/*": {"origins": "*"}})
30
+
31
+ # Load environment variables or use defaults
32
+ PORT = int(os.environ.get('PORT', 5000))
33
+ DEBUG = os.environ.get('DEBUG', 'False').lower() == 'true'
34
+ HOST = os.environ.get('HOST', '0.0.0.0')
35
+
36
+ try:
37
+ # Load spaCy model
38
+ logger.info("Loading spaCy model...")
39
+ nlp = spacy.load('en_core_web_trf')
40
+ logger.info("spaCy model loaded successfully")
41
+ except Exception as e:
42
+ logger.error(f"Failed to load spaCy model: {str(e)}")
43
+ raise
44
+
45
+ # Define tag groups for RSVP
46
+ DELAY_GROUPS = {
47
+ 'named_entity': [],
48
+ 'content': ['NOUN', 'PROPN', 'VERB', 'ADJ', 'INTJ'],
49
+ 'function': ['DET', 'PRON', 'ADP', 'AUX', 'CCONJ', 'SCONJ'],
50
+ 'modifier': ['ADV', 'NUM'],
51
+ 'punctuation': ['PUNCT', 'SPACE', 'SYM']
52
+ }
53
+
54
+ def get_group(token):
55
+ """Return the group name for a given token, considering both NER and POS"""
56
+ if token.ent_type_:
57
+ return 'named_entity'
58
+
59
+ pos_tag = token.pos_
60
+ for group, tags in DELAY_GROUPS.items():
61
+ if pos_tag in tags:
62
+ return group
63
+ return 'other'
64
+
65
+ def combine_with_punctuation(doc):
66
+ """Combine tokens with following punctuation and get their groups"""
67
+ combined_tokens = []
68
+ combined_groups = []
69
+ current_token = ""
70
+ current_group = None
71
+
72
+ for i, token in enumerate(doc):
73
+ if token.pos_ in DELAY_GROUPS['punctuation']:
74
+ if current_token:
75
+ current_token += token.text
76
+ else:
77
+ current_token = token.text
78
+ current_group = get_group(token)
79
+ else:
80
+ if current_token:
81
+ combined_tokens.append(current_token)
82
+ combined_groups.append(current_group)
83
+ current_token = token.text
84
+ current_group = get_group(token)
85
+
86
+ if i == len(doc) - 1:
87
+ combined_tokens.append(current_token)
88
+ combined_groups.append(current_group)
89
+
90
+ return combined_tokens, combined_groups
91
+
92
+ @app.route('/health', methods=['GET'])
93
+ def health_check():
94
+ """Health check endpoint"""
95
+ return jsonify({'status': 'healthy', 'model': 'en_core_web_md'})
96
+
97
+ @app.route('/pos-tag', methods=['POST'])
98
+ def pos_tag():
99
+ try:
100
+ data = request.get_json()
101
+
102
+ if not data:
103
+ logger.warning("No JSON data in request")
104
+ return jsonify({'error': 'No JSON data provided'}), 400
105
+
106
+ if 'text' not in data:
107
+ logger.warning("No 'text' field in request data")
108
+ return jsonify({'error': 'Please provide text in the request body'}), 400
109
+
110
+ text = data['text']
111
+
112
+ if not text.strip():
113
+ logger.warning("Empty text provided")
114
+ return jsonify({'error': 'Text cannot be empty'}), 400
115
+
116
+ logger.info(f"Processing text of length: {len(text)}")
117
+ doc = nlp(text)
118
+
119
+ tokens, groups = combine_with_punctuation(doc)
120
+
121
+ # entities_info = [
122
+ # {
123
+ # 'text': ent.text,
124
+ # 'label': ent.label_
125
+ # } for ent in doc.ents
126
+ # ]
127
+
128
+ response = {
129
+ 'tokens': tokens,
130
+ 'groups': groups,
131
+ # 'entities_found': entities_info
132
+ }
133
+
134
+ logger.info(f"Successfully processed text with {len(tokens)} tokens")
135
+ return jsonify(response)
136
+
137
+ except Exception as e:
138
+ logger.error(f"Error processing request: {str(e)}", exc_info=True)
139
+ return jsonify({'error': 'Internal server error'}), 500
140
+
141
+ @app.errorhandler(404)
142
+ def not_found_error(error):
143
+ return jsonify({'error': 'Not found'}), 404
144
+
145
+ @app.errorhandler(500)
146
+ def internal_error(error):
147
+ logger.error('Server Error', exc_info=True)
148
+ return jsonify({'error': 'Internal server error'}), 500
149
+
150
+ if __name__ == '__main__':
151
+ logger.info(f"Starting server on {HOST}:{PORT}")
152
+ app.run(host=HOST, port=PORT, debug=DEBUG)