Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
import json
|
3 |
import uuid
|
4 |
-
import
|
5 |
from datetime import datetime
|
6 |
from flask import Flask, request, Response, jsonify
|
7 |
import socketio
|
@@ -9,6 +9,9 @@ import requests
|
|
9 |
import logging
|
10 |
from threading import Event
|
11 |
|
|
|
|
|
|
|
12 |
app = Flask(__name__)
|
13 |
logging.basicConfig(level=logging.INFO)
|
14 |
|
@@ -77,10 +80,9 @@ def normalize_content(content):
|
|
77 |
|
78 |
def calculate_tokens(text):
|
79 |
"""
|
80 |
-
|
81 |
"""
|
82 |
-
|
83 |
-
tokens = re.findall(r"\w+|[^\w\s]", text)
|
84 |
return len(tokens)
|
85 |
|
86 |
@app.route('/')
|
|
|
1 |
import os
|
2 |
import json
|
3 |
import uuid
|
4 |
+
import nltk
|
5 |
from datetime import datetime
|
6 |
from flask import Flask, request, Response, jsonify
|
7 |
import socketio
|
|
|
9 |
import logging
|
10 |
from threading import Event
|
11 |
|
12 |
+
# 下载nltk的 punkt模型,用于标记化
|
13 |
+
nltk.download('punkt')
|
14 |
+
|
15 |
app = Flask(__name__)
|
16 |
logging.basicConfig(level=logging.INFO)
|
17 |
|
|
|
80 |
|
81 |
def calculate_tokens(text):
|
82 |
"""
|
83 |
+
使用 nltk 的 word_tokenize 进行 token 计算。
|
84 |
"""
|
85 |
+
tokens = nltk.word_tokenize(text)
|
|
|
86 |
return len(tokens)
|
87 |
|
88 |
@app.route('/')
|