Spaces:
Running
Running
Artem Nikolaev commited on
Commit ·
ca8ebf7
1
Parent(s): 3f3962f
PDF Task Parser with FastAPI and Google integration
Browse files- Dockerfile +14 -0
- backend.py +257 -0
- requirements.txt +17 -0
- src/__init__.py +0 -0
- src/__pycache__/__init__.cpython-314.pyc +0 -0
- src/__pycache__/excel_exporter.cpython-314.pyc +0 -0
- src/__pycache__/google_calendar.cpython-314.pyc +0 -0
- src/__pycache__/google_sheets.cpython-314.pyc +0 -0
- src/__pycache__/main_with_calendar.cpython-314.pyc +0 -0
- src/__pycache__/parser.cpython-314.pyc +0 -0
- src/__pycache__/summarizer.cpython-314.pyc +0 -0
- src/excel_exporter.py +105 -0
- src/google_calendar.py +190 -0
- src/google_sheets.py +127 -0
- src/parser.py +653 -0
- src/summarizer.py +122 -0
- web/.DS_Store +0 -0
- web/icons/.DS_Store +0 -0
- web/icons/calendarLogo.svg +16 -0
- web/icons/favicon.svg +7 -0
- web/icons/sheetsLogo-svgrepo-com.svg +7 -0
- web/icons/titleIcon.svg +37 -0
- web/icons/upload.svg +27 -0
- web/index.html +92 -0
- web/script.js +230 -0
- web/style.css +253 -0
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
COPY . .
|
| 9 |
+
|
| 10 |
+
RUN mkdir -p temp_uploads output
|
| 11 |
+
|
| 12 |
+
EXPOSE 7860
|
| 13 |
+
|
| 14 |
+
CMD ["uvicorn", "backend:app", "--host", "0.0.0.0", "--port", "7860"]
|
backend.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile, File, Request
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from fastapi.responses import JSONResponse, HTMLResponse
|
| 4 |
+
import tempfile
|
| 5 |
+
import os
|
| 6 |
+
import sys
|
| 7 |
+
import base64
|
| 8 |
+
import uvicorn
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from io import BytesIO
|
| 11 |
+
|
| 12 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 13 |
+
|
| 14 |
+
from src.parser import TaskParser
|
| 15 |
+
from src.excel_exporter import ExcelExporter
|
| 16 |
+
from src.summarizer import TaskSummarizer
|
| 17 |
+
from src.google_sheets import GoogleSheetsExporter
|
| 18 |
+
from src.google_calendar import GoogleCalendarExporter
|
| 19 |
+
|
| 20 |
+
import json
|
| 21 |
+
|
| 22 |
+
GOOGLE_CREDS = os.environ.get("GOOGLE_CREDENTIALS")
|
| 23 |
+
if GOOGLE_CREDS:
|
| 24 |
+
os.makedirs("credentials", exist_ok=True)
|
| 25 |
+
with open("credentials/google-credentials.json", "w") as f:
|
| 26 |
+
f.write(GOOGLE_CREDS)
|
| 27 |
+
print("✅ Google-ключ загружен из секретов")
|
| 28 |
+
|
| 29 |
+
app = FastAPI(title="PDF Task Parser API")
|
| 30 |
+
|
| 31 |
+
app.add_middleware(
|
| 32 |
+
CORSMiddleware,
|
| 33 |
+
allow_origins=["*"],
|
| 34 |
+
allow_methods=["*"],
|
| 35 |
+
allow_headers=["*"],
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
TEMP_DIR = "temp_uploads"
|
| 39 |
+
os.makedirs(TEMP_DIR, exist_ok=True)
|
| 40 |
+
|
| 41 |
+
summarizer = None
|
| 42 |
+
|
| 43 |
+
def get_summarizer():
|
| 44 |
+
global summarizer
|
| 45 |
+
if summarizer is None:
|
| 46 |
+
try:
|
| 47 |
+
print("🔄 Загрузка суммаризатора...")
|
| 48 |
+
summarizer = TaskSummarizer()
|
| 49 |
+
print("✅ Суммаризатор загружен")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"⚠️ Суммаризатор не загружен: {e}")
|
| 52 |
+
summarizer = False
|
| 53 |
+
return summarizer if summarizer is not False else None
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@app.get("/")
|
| 57 |
+
async def root():
|
| 58 |
+
return {"message": "PDF Task Parser API", "status": "running"}
|
| 59 |
+
|
| 60 |
+
@app.get("/app")
|
| 61 |
+
async def get_app():
|
| 62 |
+
html_path = os.path.join("web", "index.html")
|
| 63 |
+
if os.path.exists(html_path):
|
| 64 |
+
with open(html_path, "r", encoding="utf-8") as f:
|
| 65 |
+
return HTMLResponse(content=f.read())
|
| 66 |
+
return HTMLResponse(content="<h1>index.html not found</h1>", status_code=404)
|
| 67 |
+
|
| 68 |
+
@app.get("/style.css")
|
| 69 |
+
async def get_css():
|
| 70 |
+
with open("web/style.css", "r", encoding="utf-8") as f:
|
| 71 |
+
return HTMLResponse(content=f.read(), media_type="text/css")
|
| 72 |
+
|
| 73 |
+
@app.get("/script.js")
|
| 74 |
+
async def get_js():
|
| 75 |
+
with open("web/script.js", "r", encoding="utf-8") as f:
|
| 76 |
+
return HTMLResponse(content=f.read(), media_type="application/javascript")
|
| 77 |
+
|
| 78 |
+
@app.get("/web/icons/{icon_name}")
|
| 79 |
+
async def get_icon(icon_name: str):
|
| 80 |
+
icon_path = os.path.join("web", "icons", icon_name)
|
| 81 |
+
if os.path.exists(icon_path):
|
| 82 |
+
with open(icon_path, "rb") as f:
|
| 83 |
+
return HTMLResponse(content=f.read(), media_type="image/svg+xml")
|
| 84 |
+
return HTMLResponse(status_code=404)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
@app.post("/parse-batch")
|
| 88 |
+
async def parse_batch(request: Request):
|
| 89 |
+
print("\n" + "="*60)
|
| 90 |
+
print("🔍 ПОЛУЧЕН ЗАПРОС НА ПАРСИНГ")
|
| 91 |
+
print("="*60)
|
| 92 |
+
|
| 93 |
+
form = await request.form()
|
| 94 |
+
|
| 95 |
+
files = form.getlist("files")
|
| 96 |
+
export_to_sheets = form.get("export_to_sheets", "false").lower() == "true"
|
| 97 |
+
export_to_calendar = form.get("export_to_calendar", "false").lower() == "true"
|
| 98 |
+
sheets_url = form.get("sheets_url", "")
|
| 99 |
+
calendar_id = form.get("calendar_id", "")
|
| 100 |
+
|
| 101 |
+
print(f"📄 Файлов: {len(files)}")
|
| 102 |
+
print(f"📊 Экспорт в Sheets: {export_to_sheets}")
|
| 103 |
+
print(f"📅 Экспорт в Calendar: {export_to_calendar}")
|
| 104 |
+
print(f"🔗 URL Sheets: {sheets_url}")
|
| 105 |
+
print(f"📆 ID Calendar: {calendar_id}")
|
| 106 |
+
print("="*60 + "\n")
|
| 107 |
+
|
| 108 |
+
all_results = []
|
| 109 |
+
all_tasks_data = []
|
| 110 |
+
sheets_export_status = None
|
| 111 |
+
calendar_export_status = None
|
| 112 |
+
|
| 113 |
+
all_dfs = []
|
| 114 |
+
|
| 115 |
+
for file in files:
|
| 116 |
+
file_ext = os.path.splitext(file.filename)[1].lower()
|
| 117 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
|
| 118 |
+
content = await file.read()
|
| 119 |
+
tmp.write(content)
|
| 120 |
+
tmp_path = tmp.name
|
| 121 |
+
|
| 122 |
+
try:
|
| 123 |
+
parser = TaskParser(tmp_path)
|
| 124 |
+
text = parser.extract_text()
|
| 125 |
+
tasks = parser.parse_tasks(text)
|
| 126 |
+
|
| 127 |
+
if tasks:
|
| 128 |
+
summarizer = get_summarizer()
|
| 129 |
+
for task in tasks:
|
| 130 |
+
task['source'] = file.filename
|
| 131 |
+
if summarizer:
|
| 132 |
+
try:
|
| 133 |
+
task['summary'] = summarizer.summarize(task['full_description'])
|
| 134 |
+
except Exception:
|
| 135 |
+
task['summary'] = task['full_description'][:100] + "..."
|
| 136 |
+
else:
|
| 137 |
+
task['summary'] = task['full_description'][:100] + "..."
|
| 138 |
+
all_tasks_data.append(task)
|
| 139 |
+
|
| 140 |
+
df_data = []
|
| 141 |
+
for task in tasks:
|
| 142 |
+
df_data.append({
|
| 143 |
+
'№': task['number'],
|
| 144 |
+
'Краткое описание': task.get('summary', ''),
|
| 145 |
+
'Описание': task['full_description'],
|
| 146 |
+
'Ответственный': task.get('responsible', ''),
|
| 147 |
+
'Срок': task.get('due_date_str', '')
|
| 148 |
+
})
|
| 149 |
+
df = pd.DataFrame(df_data)
|
| 150 |
+
all_dfs.append({
|
| 151 |
+
"df": df,
|
| 152 |
+
"filename": file.filename,
|
| 153 |
+
"tasks": tasks
|
| 154 |
+
})
|
| 155 |
+
|
| 156 |
+
all_results.append({
|
| 157 |
+
"filename": file.filename,
|
| 158 |
+
"tasks": tasks,
|
| 159 |
+
"count": len(tasks)
|
| 160 |
+
})
|
| 161 |
+
|
| 162 |
+
os.remove(tmp_path)
|
| 163 |
+
|
| 164 |
+
except Exception as e:
|
| 165 |
+
print(f"Ошибка в {file.filename}: {e}")
|
| 166 |
+
if os.path.exists(tmp_path):
|
| 167 |
+
os.remove(tmp_path)
|
| 168 |
+
|
| 169 |
+
if not all_tasks_data:
|
| 170 |
+
return JSONResponse({
|
| 171 |
+
"success": False,
|
| 172 |
+
"error": "Задачи не найдены ни в одном файле"
|
| 173 |
+
})
|
| 174 |
+
|
| 175 |
+
# ===== ЭКСПОРТ В GOOGLE SHEETS =====
|
| 176 |
+
if export_to_sheets and sheets_url:
|
| 177 |
+
try:
|
| 178 |
+
print("📊 Экспорт в Google Sheets...")
|
| 179 |
+
sheets_exporter = GoogleSheetsExporter()
|
| 180 |
+
if sheets_exporter.use_existing_spreadsheet(sheets_url):
|
| 181 |
+
for item in all_dfs:
|
| 182 |
+
sheet_name = os.path.splitext(item['filename'])[0][:30]
|
| 183 |
+
sheet_name = sheet_name.replace(' ', '_').replace('/', '_')
|
| 184 |
+
sheets_exporter.export_dataframe(item['df'], sheet_name)
|
| 185 |
+
sheets_export_status = "success"
|
| 186 |
+
print("✅ Экспорт в Google Sheets выполнен")
|
| 187 |
+
else:
|
| 188 |
+
sheets_export_status = "error: таблица не найдена"
|
| 189 |
+
print("❌ Таблица не найдена")
|
| 190 |
+
except Exception as e:
|
| 191 |
+
sheets_export_status = f"error: {str(e)}"
|
| 192 |
+
print(f"❌ Ошибка Sheets: {e}")
|
| 193 |
+
|
| 194 |
+
# ===== ЭКСПОРТ В GOOGLE CALENDAR =====
|
| 195 |
+
if export_to_calendar and calendar_id:
|
| 196 |
+
try:
|
| 197 |
+
print(f"📅 Экспорт в Google Calendar...")
|
| 198 |
+
print(f" ID календаря: {calendar_id}")
|
| 199 |
+
print(f" Количество задач: {len(all_tasks_data)}")
|
| 200 |
+
|
| 201 |
+
calendar_exporter = GoogleCalendarExporter(calendar_id=calendar_id)
|
| 202 |
+
calendar_exporter.create_events_from_tasks(all_tasks_data)
|
| 203 |
+
calendar_export_status = "success"
|
| 204 |
+
print("✅ Экспорт в Google Calendar выполнен")
|
| 205 |
+
except Exception as e:
|
| 206 |
+
calendar_export_status = f"error: {str(e)}"
|
| 207 |
+
print(f"❌ Ошибка Calendar: {e}")
|
| 208 |
+
else:
|
| 209 |
+
print(f"⚠️ Экспорт в Calendar пропущен: export_to_calendar={export_to_calendar}, calendar_id={calendar_id}")
|
| 210 |
+
|
| 211 |
+
# ===== СОЗДАЁМ EXCEL =====
|
| 212 |
+
exporter = ExcelExporter()
|
| 213 |
+
|
| 214 |
+
for item in all_dfs:
|
| 215 |
+
sheet_name = os.path.splitext(item['filename'])[0][:30]
|
| 216 |
+
sheet_name = sheet_name.replace(' ', '_').replace('/', '_').replace('\\', '_')
|
| 217 |
+
exporter.add_sheet(item['df'], sheet_name)
|
| 218 |
+
|
| 219 |
+
all_df_data = []
|
| 220 |
+
for task in all_tasks_data:
|
| 221 |
+
all_df_data.append({
|
| 222 |
+
'Источник': task.get('source', ''),
|
| 223 |
+
'№': task['number'],
|
| 224 |
+
'Краткое описание': task.get('summary', ''),
|
| 225 |
+
'Описание': task['full_description'],
|
| 226 |
+
'Ответственный': task.get('responsible', ''),
|
| 227 |
+
'Срок': task.get('due_date_str', '')
|
| 228 |
+
})
|
| 229 |
+
all_df = pd.DataFrame(all_df_data)
|
| 230 |
+
exporter.add_sheet(all_df, "Все задачи")
|
| 231 |
+
|
| 232 |
+
excel_buffer = BytesIO()
|
| 233 |
+
exporter.save_to_buffer(excel_buffer)
|
| 234 |
+
excel_bytes = excel_buffer.getvalue()
|
| 235 |
+
excel_base64 = base64.b64encode(excel_bytes).decode('ascii')
|
| 236 |
+
|
| 237 |
+
total_stats = {
|
| 238 |
+
"total": len(all_tasks_data),
|
| 239 |
+
"with_responsible": sum(1 for t in all_tasks_data if t.get('responsible')),
|
| 240 |
+
"with_date": sum(1 for t in all_tasks_data if t.get('due_date_str')),
|
| 241 |
+
"files_count": len(all_results)
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
return {
|
| 245 |
+
"success": True,
|
| 246 |
+
"tasks": all_tasks_data,
|
| 247 |
+
"statistics": total_stats,
|
| 248 |
+
"excel_base64": excel_base64,
|
| 249 |
+
"files": [{"name": r["filename"], "count": r["count"]} for r in all_results],
|
| 250 |
+
"sheets_export": sheets_export_status,
|
| 251 |
+
"calendar_export": calendar_export_status
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
if __name__ == "__main__":
|
| 256 |
+
print("🚀 Запуск PDF Task Parser API")
|
| 257 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
requirements.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.135.1
|
| 2 |
+
uvicorn==0.42.0
|
| 3 |
+
python-multipart==0.0.22
|
| 4 |
+
pandas==3.0.1
|
| 5 |
+
openpyxl==3.1.5
|
| 6 |
+
pdfplumber==0.11.9
|
| 7 |
+
python-docx==1.2.0
|
| 8 |
+
google-auth==2.48.0
|
| 9 |
+
google-api-python-client==2.190.0
|
| 10 |
+
gspread==6.2.1
|
| 11 |
+
transformers==5.2.0
|
| 12 |
+
torch==2.10.0
|
| 13 |
+
sentencepiece==0.2.1
|
| 14 |
+
tiktoken==0.12.0
|
| 15 |
+
python-dateutil==2.9.0.post0
|
| 16 |
+
certifi==2026.2.25
|
| 17 |
+
requests==2.32.5
|
src/__init__.py
ADDED
|
File without changes
|
src/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (143 Bytes). View file
|
|
|
src/__pycache__/excel_exporter.cpython-314.pyc
ADDED
|
Binary file (6.23 kB). View file
|
|
|
src/__pycache__/google_calendar.cpython-314.pyc
ADDED
|
Binary file (9.01 kB). View file
|
|
|
src/__pycache__/google_sheets.cpython-314.pyc
ADDED
|
Binary file (6.5 kB). View file
|
|
|
src/__pycache__/main_with_calendar.cpython-314.pyc
ADDED
|
Binary file (12.3 kB). View file
|
|
|
src/__pycache__/parser.cpython-314.pyc
ADDED
|
Binary file (32.6 kB). View file
|
|
|
src/__pycache__/summarizer.cpython-314.pyc
ADDED
|
Binary file (6.36 kB). View file
|
|
|
src/excel_exporter.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from openpyxl import Workbook
|
| 3 |
+
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
import os
|
| 6 |
+
from io import BytesIO
|
| 7 |
+
|
| 8 |
+
class ExcelExporter:
|
| 9 |
+
def __init__(self, filename=None):
|
| 10 |
+
self.filename = filename
|
| 11 |
+
self.base_filename = "output/tasks.xlsx"
|
| 12 |
+
|
| 13 |
+
if filename and os.path.exists(filename):
|
| 14 |
+
from openpyxl import load_workbook
|
| 15 |
+
self.wb = load_workbook(filename)
|
| 16 |
+
else:
|
| 17 |
+
self.wb = Workbook()
|
| 18 |
+
if "Sheet" in self.wb.sheetnames:
|
| 19 |
+
self.wb.remove(self.wb["Sheet"])
|
| 20 |
+
|
| 21 |
+
def add_sheet(self, df, sheet_name: str):
|
| 22 |
+
if sheet_name in self.wb.sheetnames:
|
| 23 |
+
self.wb.remove(self.wb[sheet_name])
|
| 24 |
+
|
| 25 |
+
ws = self.wb.create_sheet(title=sheet_name)
|
| 26 |
+
|
| 27 |
+
headers = list(df.columns)
|
| 28 |
+
for col_idx, header in enumerate(headers, 1):
|
| 29 |
+
ws.cell(row=1, column=col_idx, value=header)
|
| 30 |
+
|
| 31 |
+
for row_idx, row in df.iterrows():
|
| 32 |
+
for col_idx, value in enumerate(row, 1):
|
| 33 |
+
cell = ws.cell(row=row_idx + 2, column=col_idx, value=value)
|
| 34 |
+
if isinstance(value, (datetime, pd.Timestamp)):
|
| 35 |
+
cell.number_format = 'DD.MM.YYYY'
|
| 36 |
+
|
| 37 |
+
self._apply_formatting(ws, len(df.columns), len(df))
|
| 38 |
+
|
| 39 |
+
def _apply_formatting(self, ws, num_columns, num_rows):
|
| 40 |
+
header_font = Font(name='Arial', size=12, bold=True, color='FFFFFF')
|
| 41 |
+
header_fill = PatternFill(start_color='366092', end_color='366092', fill_type='solid')
|
| 42 |
+
header_alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
| 43 |
+
|
| 44 |
+
cell_alignment = Alignment(horizontal='left', vertical='center', wrap_text=True)
|
| 45 |
+
date_alignment = Alignment(horizontal='center', vertical='center')
|
| 46 |
+
|
| 47 |
+
border = Border(
|
| 48 |
+
left=Side(style='thin'),
|
| 49 |
+
right=Side(style='thin'),
|
| 50 |
+
top=Side(style='thin'),
|
| 51 |
+
bottom=Side(style='thin')
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
for col in range(1, num_columns + 1):
|
| 55 |
+
cell = ws.cell(row=1, column=col)
|
| 56 |
+
cell.font = header_font
|
| 57 |
+
cell.fill = header_fill
|
| 58 |
+
cell.alignment = header_alignment
|
| 59 |
+
cell.border = border
|
| 60 |
+
|
| 61 |
+
for row in range(2, num_rows + 2):
|
| 62 |
+
for col in range(1, num_columns + 1):
|
| 63 |
+
cell = ws.cell(row=row, column=col)
|
| 64 |
+
cell.border = border
|
| 65 |
+
|
| 66 |
+
col_letter = ws.cell(row=1, column=col).value
|
| 67 |
+
if col_letter in ['Срок', 'Дата']:
|
| 68 |
+
cell.alignment = date_alignment
|
| 69 |
+
else:
|
| 70 |
+
cell.alignment = cell_alignment
|
| 71 |
+
|
| 72 |
+
for col in ws.columns:
|
| 73 |
+
max_length = 0
|
| 74 |
+
col_letter = col[0].column_letter
|
| 75 |
+
for cell in col:
|
| 76 |
+
try:
|
| 77 |
+
if len(str(cell.value)) > max_length:
|
| 78 |
+
max_length = len(str(cell.value))
|
| 79 |
+
except:
|
| 80 |
+
pass
|
| 81 |
+
adjusted_width = min(max_length + 2, 80)
|
| 82 |
+
ws.column_dimensions[col_letter].width = adjusted_width
|
| 83 |
+
|
| 84 |
+
ws.freeze_panes = 'A2'
|
| 85 |
+
|
| 86 |
+
def save(self, filename=None):
|
| 87 |
+
if filename is None:
|
| 88 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 89 |
+
filename = f"output/tasks_{timestamp}.xlsx"
|
| 90 |
+
|
| 91 |
+
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
| 92 |
+
|
| 93 |
+
for sheet in self.wb.worksheets:
|
| 94 |
+
for row in sheet.iter_rows():
|
| 95 |
+
for cell in row:
|
| 96 |
+
if cell.value is not None:
|
| 97 |
+
_ = cell.value
|
| 98 |
+
|
| 99 |
+
self.wb.save(filename)
|
| 100 |
+
print(f"✅ Excel файл сохранен: {filename}")
|
| 101 |
+
return filename
|
| 102 |
+
|
| 103 |
+
def save_to_buffer(self, buffer):
|
| 104 |
+
self.wb.save(buffer)
|
| 105 |
+
return buffer
|
src/google_calendar.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from google.oauth2 import service_account
|
| 2 |
+
from googleapiclient.discovery import build
|
| 3 |
+
from googleapiclient.errors import HttpError
|
| 4 |
+
import datetime
|
| 5 |
+
import os
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
class GoogleCalendarExporter:
|
| 9 |
+
"""
|
| 10 |
+
Класс для экспорта задач в Google Calendar
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
def __init__(self, credentials_path='credentials/google-credentials.json', calendar_id='primary'):
|
| 14 |
+
"""
|
| 15 |
+
Инициализация подключения к Google Calendar
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
credentials_path: путь к JSON-файлу с ключами сервисного аккаунта
|
| 19 |
+
calendar_id: ID календаря ('primary' для основного или конкретный ID)
|
| 20 |
+
"""
|
| 21 |
+
self.credentials_path = credentials_path
|
| 22 |
+
self.calendar_id = calendar_id
|
| 23 |
+
self.service = None
|
| 24 |
+
|
| 25 |
+
# Проверяем наличие файла с ключами
|
| 26 |
+
if not os.path.exists(credentials_path):
|
| 27 |
+
print(f"❌ Файл с ключами не найден: {credentials_path}")
|
| 28 |
+
print("💡 Убедитесь, что файл лежит в папке credentials/")
|
| 29 |
+
sys.exit(1)
|
| 30 |
+
|
| 31 |
+
self._authenticate()
|
| 32 |
+
|
| 33 |
+
def _authenticate(self):
|
| 34 |
+
"""Аутентификация в Google Calendar API через сервисный аккаунт"""
|
| 35 |
+
try:
|
| 36 |
+
# Определяем права доступа (нужны для записи)
|
| 37 |
+
SCOPES = ['https://www.googleapis.com/auth/calendar'] # Полный доступ к календарю [citation:6]
|
| 38 |
+
|
| 39 |
+
# Загружаем ключи сервисного аккаунта [citation:5]
|
| 40 |
+
credentials = service_account.Credentials.from_service_account_file(
|
| 41 |
+
self.credentials_path,
|
| 42 |
+
scopes=SCOPES
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Создаем сервис для работы с Calendar API [citation:6]
|
| 46 |
+
self.service = build('calendar', 'v3', credentials=credentials)
|
| 47 |
+
print("✅ Успешная аутентификация в Google Calendar")
|
| 48 |
+
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print(f"❌ Ошибка аутентификации: {e}")
|
| 51 |
+
sys.exit(1)
|
| 52 |
+
|
| 53 |
+
def create_event_from_task(self, task):
|
| 54 |
+
"""
|
| 55 |
+
Создает событие в календаре из задачи
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
task: словарь с данными задачи (number, summary, full_description, responsible, due_date, due_date_str)
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
ссылка на созданное событие или None при ошибке
|
| 62 |
+
"""
|
| 63 |
+
if not self.service:
|
| 64 |
+
print("❌ Сервис не инициализирован")
|
| 65 |
+
return None
|
| 66 |
+
|
| 67 |
+
# Проверяем наличие даты
|
| 68 |
+
if not task.get('due_date'):
|
| 69 |
+
print(f"⚠️ Задача #{task.get('number', '?')} пропущена: нет даты")
|
| 70 |
+
return None
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
# Формируем событие
|
| 74 |
+
due_date = task['due_date']
|
| 75 |
+
|
| 76 |
+
# Создаем событие на целый день (если не указано время) [citation:6]
|
| 77 |
+
# Или можно задать конкретное время, например 10:00
|
| 78 |
+
event_date = due_date.strftime('%Y-%m-%d')
|
| 79 |
+
|
| 80 |
+
# Берем краткое описание или начало полного
|
| 81 |
+
summary = task.get('summary', '')
|
| 82 |
+
if not summary:
|
| 83 |
+
summary = task.get('full_description', '')[:50] + '...'
|
| 84 |
+
|
| 85 |
+
# Формируем описание события
|
| 86 |
+
description = f"""
|
| 87 |
+
📋 Задача #{task.get('number', '?')}
|
| 88 |
+
|
| 89 |
+
📝 Полное описание:
|
| 90 |
+
{task.get('full_description', '')}
|
| 91 |
+
|
| 92 |
+
👤 Ответственный: {task.get('responsible', 'Не указан')}
|
| 93 |
+
|
| 94 |
+
📅 Срок: {task.get('due_date_str', '')}
|
| 95 |
+
|
| 96 |
+
🔗 Создано автоматически парсером задач
|
| 97 |
+
""".strip()
|
| 98 |
+
|
| 99 |
+
# Создаем событие [citation:6]
|
| 100 |
+
event = {
|
| 101 |
+
'summary': f"Задача #{task['number']}: {summary}",
|
| 102 |
+
'description': description,
|
| 103 |
+
'start': {
|
| 104 |
+
'date': event_date, # Целый день
|
| 105 |
+
},
|
| 106 |
+
'end': {
|
| 107 |
+
'date': event_date, # Целый день
|
| 108 |
+
},
|
| 109 |
+
'reminders': {
|
| 110 |
+
'useDefault': True # Использовать стандартные напоминания
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
# Добавляем время, если нужно (например, сделать на 10:00)
|
| 115 |
+
# event['start']['dateTime'] = f"{event_date}T10:00:00+03:00"
|
| 116 |
+
# event['end']['dateTime'] = f"{event_date}T11:00:00+03:00"
|
| 117 |
+
|
| 118 |
+
# Создаем событие в календаре [citation:1]
|
| 119 |
+
created_event = self.service.events().insert(
|
| 120 |
+
calendarId=self.calendar_id,
|
| 121 |
+
body=event
|
| 122 |
+
).execute()
|
| 123 |
+
|
| 124 |
+
print(f"✅ Событие создано: {created_event.get('htmlLink')}")
|
| 125 |
+
return created_event.get('htmlLink')
|
| 126 |
+
|
| 127 |
+
except HttpError as e:
|
| 128 |
+
print(f"❌ Ошибка API при создании события для задачи #{task.get('number', '?')}: {e}")
|
| 129 |
+
return None
|
| 130 |
+
except Exception as e:
|
| 131 |
+
print(f"❌ Неожиданная ошибка для задачи #{task.get('number', '?')}: {e}")
|
| 132 |
+
return None
|
| 133 |
+
|
| 134 |
+
def create_events_from_tasks(self, tasks):
|
| 135 |
+
"""
|
| 136 |
+
Создает события для списка задач
|
| 137 |
+
|
| 138 |
+
Args:
|
| 139 |
+
tasks: список словарей с задачами
|
| 140 |
+
|
| 141 |
+
Returns:
|
| 142 |
+
список ссылок на созданные события
|
| 143 |
+
"""
|
| 144 |
+
results = []
|
| 145 |
+
|
| 146 |
+
print(f"\n📅 Создание событий в календаре (ID: {self.calendar_id})...")
|
| 147 |
+
|
| 148 |
+
for task in tasks:
|
| 149 |
+
event_link = self.create_event_from_task(task)
|
| 150 |
+
if event_link:
|
| 151 |
+
results.append({
|
| 152 |
+
'task_number': task.get('number'),
|
| 153 |
+
'task_summary': task.get('summary', '')[:30] + '...',
|
| 154 |
+
'event_link': event_link
|
| 155 |
+
})
|
| 156 |
+
|
| 157 |
+
print(f"\n✅ Создано {len(results)} событий из {len(tasks)} задач")
|
| 158 |
+
return results
|
| 159 |
+
|
| 160 |
+
def check_calendar_access(self):
|
| 161 |
+
"""Проверяет доступ к календарю (выводит список ближайших событий)"""
|
| 162 |
+
try:
|
| 163 |
+
now = datetime.datetime.utcnow().isoformat() + 'Z'
|
| 164 |
+
events_result = self.service.events().list(
|
| 165 |
+
calendarId=self.calendar_id,
|
| 166 |
+
timeMin=now,
|
| 167 |
+
maxResults=5,
|
| 168 |
+
singleEvents=True,
|
| 169 |
+
orderBy='startTime'
|
| 170 |
+
).execute()
|
| 171 |
+
|
| 172 |
+
events = events_result.get('items', [])
|
| 173 |
+
|
| 174 |
+
if not events:
|
| 175 |
+
print("📭 В календаре нет предстоящих событий")
|
| 176 |
+
else:
|
| 177 |
+
print(f"📅 Ближайшие события в календаре:")
|
| 178 |
+
for event in events:
|
| 179 |
+
start = event['start'].get('dateTime', event['start'].get('date'))
|
| 180 |
+
print(f" • {start}: {event.get('summary', 'Без названия')}")
|
| 181 |
+
|
| 182 |
+
return True
|
| 183 |
+
|
| 184 |
+
except HttpError as e:
|
| 185 |
+
print(f"❌ Ошибка доступа к календарю: {e}")
|
| 186 |
+
print("💡 Проверьте, что:")
|
| 187 |
+
print(" 1. Calendar API включен в Google Cloud Console")
|
| 188 |
+
print(" 2. Календарь расшарен на email сервисного аккаунта")
|
| 189 |
+
print(" 3. Calendar ID указан правильно")
|
| 190 |
+
return False
|
src/google_sheets.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gspread
|
| 2 |
+
from google.oauth2.service_account import Credentials
|
| 3 |
+
from google.auth.exceptions import GoogleAuthError
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
|
| 9 |
+
class GoogleSheetsExporter:
|
| 10 |
+
"""
|
| 11 |
+
Класс для экспорта задач в Google Sheets (исправленная версия)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, credentials_path='credentials/google-credentials.json'):
|
| 15 |
+
"""
|
| 16 |
+
Инициализация подключения к Google Sheets
|
| 17 |
+
"""
|
| 18 |
+
self.credentials_path = credentials_path
|
| 19 |
+
self.client = None
|
| 20 |
+
self.spreadsheet = None
|
| 21 |
+
|
| 22 |
+
# Проверяем наличие файла с ключами
|
| 23 |
+
if not os.path.exists(credentials_path):
|
| 24 |
+
print(f"❌ Файл с ключами не найден: {credentials_path}")
|
| 25 |
+
print("💡 Убедитесь, что файл лежит в папке credentials/")
|
| 26 |
+
sys.exit(1)
|
| 27 |
+
|
| 28 |
+
self._authenticate()
|
| 29 |
+
|
| 30 |
+
def _authenticate(self):
|
| 31 |
+
"""Аутентификация в Google Sheets API"""
|
| 32 |
+
try:
|
| 33 |
+
# Определяем права доступа
|
| 34 |
+
scope = [
|
| 35 |
+
'https://www.googleapis.com/auth/spreadsheets',
|
| 36 |
+
'https://www.googleapis.com/auth/drive'
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
# Загружаем ключи
|
| 40 |
+
credentials = Credentials.from_service_account_file(
|
| 41 |
+
self.credentials_path,
|
| 42 |
+
scopes=scope
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Авторизуемся
|
| 46 |
+
self.client = gspread.authorize(credentials)
|
| 47 |
+
print("✅ Успешная аутентификация в Google Sheets")
|
| 48 |
+
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print(f"❌ Ошибка аутентификации: {e}")
|
| 51 |
+
sys.exit(1)
|
| 52 |
+
|
| 53 |
+
def use_existing_spreadsheet(self, spreadsheet_identifier):
|
| 54 |
+
"""
|
| 55 |
+
ИСПРАВЛЕНО: Открывает существующую таблицу (по URL, ID или названию)
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
spreadsheet_identifier: URL, ID или название таблицы
|
| 59 |
+
"""
|
| 60 |
+
try:
|
| 61 |
+
# Пробуем открыть по URL
|
| 62 |
+
if spreadsheet_identifier.startswith('https://'):
|
| 63 |
+
self.spreadsheet = self.client.open_by_url(spreadsheet_identifier)
|
| 64 |
+
print(f"✅ Открыта таблица по URL")
|
| 65 |
+
else:
|
| 66 |
+
# Пробуем открыть по ID или названию
|
| 67 |
+
try:
|
| 68 |
+
self.spreadsheet = self.client.open_by_key(spreadsheet_identifier)
|
| 69 |
+
except:
|
| 70 |
+
self.spreadsheet = self.client.open(spreadsheet_identifier)
|
| 71 |
+
|
| 72 |
+
print(f"✅ Таблица: {self.spreadsheet.title}")
|
| 73 |
+
return self.spreadsheet
|
| 74 |
+
|
| 75 |
+
except gspread.SpreadsheetNotFound:
|
| 76 |
+
print(f"❌ Таблица не найдена. Проверьте:")
|
| 77 |
+
print(f" 1. Правильно ли вы скопировали ссылку/ID")
|
| 78 |
+
print(f" 2. Расшарили ли таблицу на email сервисного аккаунта")
|
| 79 |
+
return None
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"❌ Ошибка при открытии таблицы: {e}")
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
def export_dataframe(self, df, sheet_name='Tasks', clear_sheet=True):
|
| 85 |
+
"""
|
| 86 |
+
Экспортирует DataFrame в открытую Google таблицу
|
| 87 |
+
"""
|
| 88 |
+
if self.spreadsheet is None:
|
| 89 |
+
print("❌ Сначала откройте таблицу через use_existing_spreadsheet()")
|
| 90 |
+
return False
|
| 91 |
+
|
| 92 |
+
try:
|
| 93 |
+
# Проверяем, существует ли лист с таким названием
|
| 94 |
+
try:
|
| 95 |
+
worksheet = self.spreadsheet.worksheet(sheet_name)
|
| 96 |
+
if clear_sheet:
|
| 97 |
+
worksheet.clear()
|
| 98 |
+
print(f"🧹 Лист '{sheet_name}' очищен")
|
| 99 |
+
except gspread.WorksheetNotFound:
|
| 100 |
+
# Создаем новый лист
|
| 101 |
+
worksheet = self.spreadsheet.add_worksheet(
|
| 102 |
+
title=sheet_name,
|
| 103 |
+
rows=max(100, len(df) + 10),
|
| 104 |
+
cols=len(df.columns) + 5
|
| 105 |
+
)
|
| 106 |
+
print(f"📄 Создан новый лист: '{sheet_name}'")
|
| 107 |
+
|
| 108 |
+
# Подготавливаем данные
|
| 109 |
+
headers = df.columns.tolist()
|
| 110 |
+
data = df.values.tolist()
|
| 111 |
+
all_data = [headers] + data
|
| 112 |
+
|
| 113 |
+
# Записываем
|
| 114 |
+
worksheet.update('A1', all_data)
|
| 115 |
+
print(f"✅ Записано {len(df)} строк в лист '{sheet_name}'")
|
| 116 |
+
|
| 117 |
+
return worksheet
|
| 118 |
+
|
| 119 |
+
except Exception as e:
|
| 120 |
+
print(f"❌ Ошибка при экспорте: {e}")
|
| 121 |
+
return False
|
| 122 |
+
|
| 123 |
+
def get_shareable_link(self):
|
| 124 |
+
"""Возвращает ссылку на таблицу"""
|
| 125 |
+
if self.spreadsheet:
|
| 126 |
+
return self.spreadsheet.url
|
| 127 |
+
return None
|
src/parser.py
ADDED
|
@@ -0,0 +1,653 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pdfplumber
|
| 2 |
+
import re
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from typing import List, Dict, Optional
|
| 5 |
+
import os
|
| 6 |
+
import subprocess
|
| 7 |
+
import shutil
|
| 8 |
+
|
| 9 |
+
class TaskParser:
|
| 10 |
+
def __init__(self, file_path: str):
|
| 11 |
+
self.file_path = file_path
|
| 12 |
+
self.tasks = []
|
| 13 |
+
self.file_extension = os.path.splitext(file_path)[1].lower()
|
| 14 |
+
|
| 15 |
+
# ========== НАСТРОЙКИ ПОЛЕЙ (МОЖНО МЕНЯТЬ) ==========
|
| 16 |
+
# Ключевые слова для поиска даты
|
| 17 |
+
self.date_keywords = ['Срок', 'Дата', 'Дедлайн', 'Due', 'Выполнить до']
|
| 18 |
+
|
| 19 |
+
# Ключевые слова для поиска ответственного
|
| 20 |
+
self.resp_keywords = ['Отв.', 'Исполнитель', 'Ответственный', 'Исп.']
|
| 21 |
+
|
| 22 |
+
# Текстовые статусы выполнения
|
| 23 |
+
self.status_keywords = ['выполнено', 'выполнен', 'сделано', 'готово']
|
| 24 |
+
|
| 25 |
+
# Разделители между словом и значением
|
| 26 |
+
self.separators = r'\s*(?:—|–|-|:)?\s*'
|
| 27 |
+
|
| 28 |
+
# Слова, которые означают конец раздела с задачами
|
| 29 |
+
self.after_keywords = [
|
| 30 |
+
'Протокол вёл',
|
| 31 |
+
'Лист согласования',
|
| 32 |
+
'Стр.',
|
| 33 |
+
'Page',
|
| 34 |
+
'Ознакомлены',
|
| 35 |
+
'Подписи',
|
| 36 |
+
'УТВЕРЖДАЮ',
|
| 37 |
+
'СОГЛАСОВАНО',
|
| 38 |
+
'От АО «ТАНЕКО»:',
|
| 39 |
+
'От ООО «НТЦ Татнефть»:',
|
| 40 |
+
'От ООО «ЭПИК»:'
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
# Слова, которые игнорируются до РЕШИЛИ:
|
| 44 |
+
self.before_keywords = [
|
| 45 |
+
'СЛУШАЛИ:',
|
| 46 |
+
'ВЫСТУПИЛИ:',
|
| 47 |
+
'ДОКЛАДЫВАЛИ:',
|
| 48 |
+
'ОБСУЖДАЛИ:',
|
| 49 |
+
'ПОВЕСТКА ДНЯ:',
|
| 50 |
+
'ПРИСУТСТВОВАЛИ:'
|
| 51 |
+
]
|
| 52 |
+
# ====================================================
|
| 53 |
+
|
| 54 |
+
def extract_text(self) -> str:
|
| 55 |
+
"""Извлекает текст из файла (поддерживает PDF, DOCX и DOC)"""
|
| 56 |
+
|
| 57 |
+
if self.file_extension == '.pdf':
|
| 58 |
+
return self._extract_from_pdf()
|
| 59 |
+
elif self.file_extension == '.docx':
|
| 60 |
+
return self._extract_from_docx()
|
| 61 |
+
elif self.file_extension == '.doc':
|
| 62 |
+
return self._extract_from_doc()
|
| 63 |
+
else:
|
| 64 |
+
print(f"❌ Неподдерживаемый формат файла: {self.file_extension}")
|
| 65 |
+
print(" Поддерживаются: .pdf, .docx, .doc")
|
| 66 |
+
return ""
|
| 67 |
+
|
| 68 |
+
def _extract_from_pdf(self) -> str:
|
| 69 |
+
full_text = ""
|
| 70 |
+
try:
|
| 71 |
+
with pdfplumber.open(self.file_path) as pdf:
|
| 72 |
+
for page in pdf.pages:
|
| 73 |
+
text = page.extract_text()
|
| 74 |
+
if text:
|
| 75 |
+
full_text += text + "\n"
|
| 76 |
+
print(f"✅ Извлечено {len(full_text)} символов из PDF")
|
| 77 |
+
return full_text
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"❌ Ошибка при чтении PDF: {e}")
|
| 80 |
+
return ""
|
| 81 |
+
|
| 82 |
+
def _extract_from_docx(self) -> str:
|
| 83 |
+
try:
|
| 84 |
+
from docx import Document
|
| 85 |
+
|
| 86 |
+
doc = Document(self.file_path)
|
| 87 |
+
full_text = []
|
| 88 |
+
|
| 89 |
+
for para in doc.paragraphs:
|
| 90 |
+
text = para.text.strip()
|
| 91 |
+
if text:
|
| 92 |
+
try:
|
| 93 |
+
import xml.etree.ElementTree as ET
|
| 94 |
+
if para._element.xpath('.//w:numPr'):
|
| 95 |
+
full_text.append(f"¶ {text}")
|
| 96 |
+
else:
|
| 97 |
+
full_text.append(text)
|
| 98 |
+
except:
|
| 99 |
+
full_text.append(text)
|
| 100 |
+
|
| 101 |
+
for table in doc.tables:
|
| 102 |
+
for row in table.rows:
|
| 103 |
+
row_text = []
|
| 104 |
+
for cell in row.cells:
|
| 105 |
+
if cell.text.strip():
|
| 106 |
+
row_text.append(cell.text)
|
| 107 |
+
if row_text:
|
| 108 |
+
full_text.append(' | '.join(row_text))
|
| 109 |
+
|
| 110 |
+
result = '\n'.join(full_text)
|
| 111 |
+
print(f"✅ Извлечено {len(result)} символов из Word документа (.docx)")
|
| 112 |
+
return result
|
| 113 |
+
|
| 114 |
+
except ImportError:
|
| 115 |
+
print("❌ Библиотека python-docx не установлена")
|
| 116 |
+
print(" Установите: pip install python-docx")
|
| 117 |
+
return ""
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"❌ Ошибка при чтении Word документа: {e}")
|
| 120 |
+
return ""
|
| 121 |
+
|
| 122 |
+
def _extract_from_doc(self) -> str:
|
| 123 |
+
if shutil.which('antiword'):
|
| 124 |
+
try:
|
| 125 |
+
result = subprocess.run(['antiword', self.file_path],
|
| 126 |
+
capture_output=True, text=True)
|
| 127 |
+
if result.returncode == 0:
|
| 128 |
+
print(f"✅ Извлечено {len(result.stdout)} символов из Word .doc файла")
|
| 129 |
+
return result.stdout
|
| 130 |
+
except Exception as e:
|
| 131 |
+
print(f"⚠️ Ошибка antiword: {e}")
|
| 132 |
+
|
| 133 |
+
if shutil.which('soffice'):
|
| 134 |
+
try:
|
| 135 |
+
import tempfile
|
| 136 |
+
temp_dir = tempfile.mkdtemp()
|
| 137 |
+
|
| 138 |
+
result = subprocess.run([
|
| 139 |
+
'soffice', '--headless', '--convert-to', 'txt',
|
| 140 |
+
'--outdir', temp_dir, self.file_path
|
| 141 |
+
], capture_output=True, text=True)
|
| 142 |
+
|
| 143 |
+
if result.returncode == 0:
|
| 144 |
+
base_name = os.path.basename(self.file_path).replace('.doc', '.txt')
|
| 145 |
+
txt_file = os.path.join(temp_dir, base_name)
|
| 146 |
+
|
| 147 |
+
if os.path.exists(txt_file):
|
| 148 |
+
with open(txt_file, 'r', encoding='utf-8', errors='ignore') as f:
|
| 149 |
+
content = f.read()
|
| 150 |
+
|
| 151 |
+
os.remove(txt_file)
|
| 152 |
+
os.rmdir(temp_dir)
|
| 153 |
+
print(f"✅ Извлечено {len(content)} символов из Word .doc файла (через LibreOffice)")
|
| 154 |
+
return content
|
| 155 |
+
except Exception as e:
|
| 156 |
+
print(f"⚠️ Ошибка при конвертации через LibreOffice: {e}")
|
| 157 |
+
|
| 158 |
+
print("❌ Не удалось извлечь текст из .doc файла.")
|
| 159 |
+
print(" Установите: brew install antiword")
|
| 160 |
+
return ""
|
| 161 |
+
|
| 162 |
+
def parse_tasks(self, text: str) -> List[Dict]:
|
| 163 |
+
lines = text.split('\n')
|
| 164 |
+
|
| 165 |
+
has_resheno = False
|
| 166 |
+
resheno_index = -1
|
| 167 |
+
for i, line in enumerate(lines[:100]):
|
| 168 |
+
if 'РЕШИЛИ:' in line:
|
| 169 |
+
has_resheno = True
|
| 170 |
+
resheno_index = i
|
| 171 |
+
print(f"✅ Найден маркер 'РЕШИЛИ:' в строке {i}")
|
| 172 |
+
break
|
| 173 |
+
|
| 174 |
+
if self.file_extension == '.pdf':
|
| 175 |
+
print("📄 PDF файл: использую простой парсинг")
|
| 176 |
+
self.tasks = self._parse_pdf_simple(lines)
|
| 177 |
+
elif has_resheno:
|
| 178 |
+
print("📝 Word файл с РЕШИЛИ: использую парсинг протокола")
|
| 179 |
+
self.tasks = self._parse_word_protocol(lines, resheno_index)
|
| 180 |
+
else:
|
| 181 |
+
print("📄 Простой список: использую базовый парсинг")
|
| 182 |
+
self.tasks = self._parse_simple_list(lines)
|
| 183 |
+
|
| 184 |
+
return self.tasks
|
| 185 |
+
|
| 186 |
+
def _parse_pdf_simple(self, lines: List[str]) -> List[Dict]:
|
| 187 |
+
tasks = []
|
| 188 |
+
current_task = None
|
| 189 |
+
current_description = []
|
| 190 |
+
|
| 191 |
+
решили_index = -1
|
| 192 |
+
for i, line in enumerate(lines):
|
| 193 |
+
if 'РЕШИЛИ:' in line:
|
| 194 |
+
решили_index = i
|
| 195 |
+
break
|
| 196 |
+
|
| 197 |
+
start_index = решили_index + 1 if решили_index != -1 else 0
|
| 198 |
+
|
| 199 |
+
i = start_index
|
| 200 |
+
while i < len(lines):
|
| 201 |
+
line = lines[i].strip()
|
| 202 |
+
if not line:
|
| 203 |
+
i += 1
|
| 204 |
+
continue
|
| 205 |
+
|
| 206 |
+
stop_parsing = False
|
| 207 |
+
for keyword in self.after_keywords:
|
| 208 |
+
if keyword in line[:30]:
|
| 209 |
+
stop_parsing = True
|
| 210 |
+
break
|
| 211 |
+
if stop_parsing:
|
| 212 |
+
break
|
| 213 |
+
|
| 214 |
+
task_match = re.match(r'^(\d+)\.\s+(.*)', line)
|
| 215 |
+
|
| 216 |
+
if task_match:
|
| 217 |
+
if current_task:
|
| 218 |
+
full_desc = ' '.join(current_description)
|
| 219 |
+
full_desc = re.sub(r'\s+', ' ', full_desc)
|
| 220 |
+
|
| 221 |
+
# Очищаем описание от метаданных
|
| 222 |
+
full_desc = re.sub(r'[;,\s]*Отв\.:\s*[^\.]+?(?:\.|$)', '', full_desc)
|
| 223 |
+
full_desc = re.sub(r'[;,\s]*Отв\.:\s*[^С]+?(?:\s+Срок|$)', '', full_desc)
|
| 224 |
+
full_desc = re.sub(r'[;,\s]*Отв\.:\s*[^\n]+', '', full_desc)
|
| 225 |
+
full_desc = re.sub(r'[;,\s]*Срок\s*[—–-]?\s*\d{2}\.\d{2}\.\d{4}', '', full_desc)
|
| 226 |
+
full_desc = re.sub(r'[;,\s]*Срок\s*[—–-]?\s*до\s+конца\s+года', '', full_desc)
|
| 227 |
+
full_desc = re.sub(r'[;,\s]*С\b', '', full_desc)
|
| 228 |
+
full_desc = re.sub(r'\s+', ' ', full_desc)
|
| 229 |
+
full_desc = re.sub(r'\s*[;,]?\s*$', '', full_desc)
|
| 230 |
+
full_desc = full_desc.strip()
|
| 231 |
+
|
| 232 |
+
current_task['full_description'] = full_desc
|
| 233 |
+
tasks.append(current_task)
|
| 234 |
+
|
| 235 |
+
task_num = task_match.group(1)
|
| 236 |
+
task_text = task_match.group(2)
|
| 237 |
+
|
| 238 |
+
current_task = {
|
| 239 |
+
'number': int(task_num),
|
| 240 |
+
'full_description': '',
|
| 241 |
+
'responsible': '',
|
| 242 |
+
'due_date': None,
|
| 243 |
+
'due_date_str': ''
|
| 244 |
+
}
|
| 245 |
+
current_description = [task_text]
|
| 246 |
+
i += 1
|
| 247 |
+
|
| 248 |
+
elif current_task:
|
| 249 |
+
current_description.append(line)
|
| 250 |
+
|
| 251 |
+
if 'Отв.:' in line:
|
| 252 |
+
resp_match = re.search(r'Отв\.:\s*([^С]+?)(?:\s+Срок|$)', line)
|
| 253 |
+
if not resp_match:
|
| 254 |
+
resp_match = re.search(r'Отв\.:\s*([^\n]+)', line)
|
| 255 |
+
|
| 256 |
+
if resp_match:
|
| 257 |
+
responsible = resp_match.group(1).strip()
|
| 258 |
+
|
| 259 |
+
# Обрезаем до ключевых слов
|
| 260 |
+
stop_words = self.date_keywords + ['Выполнено', 'Приложение', 'приложение', 'Протокол'] + self.status_keywords
|
| 261 |
+
|
| 262 |
+
for stop_word in stop_words:
|
| 263 |
+
if stop_word in responsible:
|
| 264 |
+
responsible = responsible.split(stop_word)[0].strip()
|
| 265 |
+
break
|
| 266 |
+
|
| 267 |
+
# Дополнительная очистка от "Срок" и "Выполнено" в любом регистре
|
| 268 |
+
responsible = re.sub(r'\s+Срок.*$', '', responsible, flags=re.IGNORECASE)
|
| 269 |
+
responsible = re.sub(r'\s+Выполнено.*$', '', responsible, flags=re.IGNORECASE)
|
| 270 |
+
responsible = re.sub(r'\s+до\s+конца\s+года.*$', '', responsible, flags=re.IGNORECASE)
|
| 271 |
+
|
| 272 |
+
responsible = re.sub(r'\s+', ' ', responsible)
|
| 273 |
+
current_task['responsible'] = responsible
|
| 274 |
+
|
| 275 |
+
if 'Срок' in line or any(word in line.lower() for word in self.status_keywords + ['до конца года']):
|
| 276 |
+
line_lower = line.lower()
|
| 277 |
+
|
| 278 |
+
# Проверяем на статусы выполнения
|
| 279 |
+
if any(word in line_lower for word in self.status_keywords):
|
| 280 |
+
current_task['due_date_str'] = 'Выполнено'
|
| 281 |
+
elif 'до конца года' in line_lower:
|
| 282 |
+
current_task['due_date_str'] = 'до конца года'
|
| 283 |
+
else:
|
| 284 |
+
date_match = re.search(r'Срок\s*[—–-]?\s*(\d{2}\.\d{2}\.\d{4})', line)
|
| 285 |
+
if date_match:
|
| 286 |
+
date_str = date_match.group(1).strip()
|
| 287 |
+
current_task['due_date_str'] = date_str
|
| 288 |
+
try:
|
| 289 |
+
current_task['due_date'] = datetime.strptime(date_str, '%d.%m.%Y').date()
|
| 290 |
+
except ValueError:
|
| 291 |
+
pass
|
| 292 |
+
|
| 293 |
+
i += 1
|
| 294 |
+
else:
|
| 295 |
+
i += 1
|
| 296 |
+
|
| 297 |
+
if current_task:
|
| 298 |
+
full_desc = ' '.join(current_description)
|
| 299 |
+
full_desc = re.sub(r'\s+', ' ', full_desc)
|
| 300 |
+
|
| 301 |
+
# Очищаем описание от метаданных
|
| 302 |
+
full_desc = re.sub(r'[;,\s]*Отв\.:\s*[^\.]+?(?:\.|$)', '', full_desc)
|
| 303 |
+
full_desc = re.sub(r'[;,\s]*Отв\.:\s*[^С]+?(?:\s+Срок|$)', '', full_desc)
|
| 304 |
+
full_desc = re.sub(r'[;,\s]*Отв\.:\s*[^\n]+', '', full_desc)
|
| 305 |
+
full_desc = re.sub(r'[;,\s]*Срок\s*[—–-]?\s*\d{2}\.\d{2}\.\d{4}', '', full_desc)
|
| 306 |
+
full_desc = re.sub(r'[;,\s]*Срок\s*[—–-]?\s*до\s+конца\s+года', '', full_desc)
|
| 307 |
+
full_desc = re.sub(r'[;,\s]*С\b', '', full_desc)
|
| 308 |
+
full_desc = re.sub(r'\s+', ' ', full_desc)
|
| 309 |
+
full_desc = re.sub(r'\s*[;,]?\s*$', '', full_desc)
|
| 310 |
+
full_desc = full_desc.strip()
|
| 311 |
+
|
| 312 |
+
current_task['full_description'] = full_desc
|
| 313 |
+
tasks.append(current_task)
|
| 314 |
+
|
| 315 |
+
return tasks
|
| 316 |
+
|
| 317 |
+
def _parse_word_protocol(self, all_lines: List[str], start_idx: int) -> List[Dict]:
|
| 318 |
+
tasks = []
|
| 319 |
+
|
| 320 |
+
решили_pos = -1
|
| 321 |
+
for i, line in enumerate(all_lines):
|
| 322 |
+
if 'РЕШИЛИ:' in line:
|
| 323 |
+
решили_pos = i
|
| 324 |
+
break
|
| 325 |
+
|
| 326 |
+
if решили_pos == -1:
|
| 327 |
+
return []
|
| 328 |
+
|
| 329 |
+
task_lines = []
|
| 330 |
+
i = решили_pos + 1
|
| 331 |
+
|
| 332 |
+
while i < len(all_lines) and not all_lines[i].strip():
|
| 333 |
+
i += 1
|
| 334 |
+
|
| 335 |
+
started = False
|
| 336 |
+
|
| 337 |
+
while i < len(all_lines):
|
| 338 |
+
line = all_lines[i].strip()
|
| 339 |
+
|
| 340 |
+
stop_found = False
|
| 341 |
+
for keyword in self.after_keywords:
|
| 342 |
+
if keyword in line[:30]:
|
| 343 |
+
stop_found = True
|
| 344 |
+
break
|
| 345 |
+
|
| 346 |
+
if stop_found:
|
| 347 |
+
break
|
| 348 |
+
|
| 349 |
+
if re.match(r'^\d+$', line):
|
| 350 |
+
i += 1
|
| 351 |
+
continue
|
| 352 |
+
|
| 353 |
+
is_service = False
|
| 354 |
+
for keyword in self.before_keywords:
|
| 355 |
+
if keyword in line:
|
| 356 |
+
is_service = True
|
| 357 |
+
break
|
| 358 |
+
|
| 359 |
+
if is_service:
|
| 360 |
+
i += 1
|
| 361 |
+
continue
|
| 362 |
+
|
| 363 |
+
if not started and (re.match(r'^\d+[.\t]', line) or line.startswith('¶')):
|
| 364 |
+
started = True
|
| 365 |
+
|
| 366 |
+
if started and line:
|
| 367 |
+
task_lines.append(line)
|
| 368 |
+
|
| 369 |
+
i += 1
|
| 370 |
+
|
| 371 |
+
if task_lines:
|
| 372 |
+
last_line = task_lines[-1]
|
| 373 |
+
for keyword in self.after_keywords:
|
| 374 |
+
if keyword in last_line:
|
| 375 |
+
task_lines[-1] = last_line.split(keyword)[0].strip()
|
| 376 |
+
break
|
| 377 |
+
|
| 378 |
+
if task_lines and not task_lines[-1]:
|
| 379 |
+
task_lines.pop()
|
| 380 |
+
|
| 381 |
+
i = 0
|
| 382 |
+
task_counter = 1
|
| 383 |
+
|
| 384 |
+
while i < len(task_lines):
|
| 385 |
+
line = task_lines[i]
|
| 386 |
+
|
| 387 |
+
is_task_start = False
|
| 388 |
+
task_number = None
|
| 389 |
+
description = None
|
| 390 |
+
|
| 391 |
+
match = re.match(r'^(\d+)[.\t]\s*(.*)', line)
|
| 392 |
+
if match:
|
| 393 |
+
is_task_start = True
|
| 394 |
+
task_number = int(match.group(1))
|
| 395 |
+
description = match.group(2)
|
| 396 |
+
|
| 397 |
+
if not is_task_start and line.startswith('¶'):
|
| 398 |
+
is_task_start = True
|
| 399 |
+
task_number = task_counter
|
| 400 |
+
description = re.sub(r'^¶\s*', '', line)
|
| 401 |
+
|
| 402 |
+
if not is_task_start:
|
| 403 |
+
has_resp = any(k in line for k in self.resp_keywords)
|
| 404 |
+
has_date = any(k in line for k in self.date_keywords)
|
| 405 |
+
|
| 406 |
+
is_service = False
|
| 407 |
+
for keyword in self.before_keywords:
|
| 408 |
+
if keyword in line:
|
| 409 |
+
is_service = True
|
| 410 |
+
break
|
| 411 |
+
|
| 412 |
+
if not has_resp and not has_date and not is_service and len(line) > 20:
|
| 413 |
+
is_task_start = True
|
| 414 |
+
task_number = task_counter
|
| 415 |
+
description = line
|
| 416 |
+
|
| 417 |
+
if is_task_start and description:
|
| 418 |
+
i += 1
|
| 419 |
+
responsible = ""
|
| 420 |
+
due_date_str = ""
|
| 421 |
+
due_date = None
|
| 422 |
+
|
| 423 |
+
while i < len(task_lines) and not task_lines[i].strip():
|
| 424 |
+
i += 1
|
| 425 |
+
|
| 426 |
+
collected_resp = False
|
| 427 |
+
collected_date = False
|
| 428 |
+
|
| 429 |
+
while i < len(task_lines) and not (collected_resp and collected_date):
|
| 430 |
+
current = task_lines[i].strip()
|
| 431 |
+
|
| 432 |
+
if not current:
|
| 433 |
+
i += 1
|
| 434 |
+
continue
|
| 435 |
+
|
| 436 |
+
next_is_task = False
|
| 437 |
+
if re.match(r'^\d+[.\t]', current):
|
| 438 |
+
next_is_task = True
|
| 439 |
+
elif current.startswith('¶'):
|
| 440 |
+
next_is_task = True
|
| 441 |
+
else:
|
| 442 |
+
has_resp_next = any(k in current for k in self.resp_keywords)
|
| 443 |
+
has_date_next = any(k in current for k in self.date_keywords)
|
| 444 |
+
is_service_next = any(k in current for k in self.before_keywords)
|
| 445 |
+
if not has_resp_next and not has_date_next and not is_service_next and len(current) > 20:
|
| 446 |
+
next_is_task = True
|
| 447 |
+
|
| 448 |
+
if next_is_task:
|
| 449 |
+
break
|
| 450 |
+
|
| 451 |
+
if not collected_resp:
|
| 452 |
+
for keyword in self.resp_keywords:
|
| 453 |
+
if keyword in current:
|
| 454 |
+
resp_parts = current.split(keyword)
|
| 455 |
+
if len(resp_parts) > 1:
|
| 456 |
+
resp_text = resp_parts[1].strip()
|
| 457 |
+
|
| 458 |
+
stop_words = self.date_keywords + ['Выполнено', 'Приложение', 'приложение', 'Протокол'] + self.status_keywords
|
| 459 |
+
|
| 460 |
+
for stop_word in stop_words:
|
| 461 |
+
if stop_word in resp_text.lower():
|
| 462 |
+
resp_text = resp_text.split(stop_word)[0].strip()
|
| 463 |
+
break
|
| 464 |
+
|
| 465 |
+
# Дополнительная очистка
|
| 466 |
+
resp_text = re.sub(r'\s+Срок.*$', '', resp_text, flags=re.IGNORECASE)
|
| 467 |
+
resp_text = re.sub(r'\s+Выполнено.*$', '', resp_text, flags=re.IGNORECASE)
|
| 468 |
+
resp_text = re.sub(r'\s+до\s+конца\s+года.*$', '', resp_text, flags=re.IGNORECASE)
|
| 469 |
+
|
| 470 |
+
responsible = re.sub(r'\s+', ' ', resp_text)
|
| 471 |
+
responsible = re.sub(r'^:\s*', '', responsible)
|
| 472 |
+
collected_resp = True
|
| 473 |
+
|
| 474 |
+
for d_keyword in self.date_keywords:
|
| 475 |
+
if d_keyword in current:
|
| 476 |
+
date_parts = current.split(d_keyword)
|
| 477 |
+
if len(date_parts) > 1:
|
| 478 |
+
date_text = date_parts[1].strip()
|
| 479 |
+
date_text = re.sub(r'^\s*[—–-]?\s*', '', date_text)
|
| 480 |
+
due_date_str = re.sub(r'\s+', ' ', date_text)
|
| 481 |
+
|
| 482 |
+
date_match = re.search(r'(\d{2}\.\d{2}\.\d{4})', date_text)
|
| 483 |
+
if date_match:
|
| 484 |
+
try:
|
| 485 |
+
due_date = datetime.strptime(date_match.group(1), '%d.%m.%Y').date()
|
| 486 |
+
except ValueError:
|
| 487 |
+
pass
|
| 488 |
+
collected_date = True
|
| 489 |
+
break
|
| 490 |
+
i += 1
|
| 491 |
+
break
|
| 492 |
+
|
| 493 |
+
if not collected_date and i < len(task_lines):
|
| 494 |
+
current = task_lines[i].strip()
|
| 495 |
+
current_lower = current.lower()
|
| 496 |
+
|
| 497 |
+
# Проверяем на статусы выполнения
|
| 498 |
+
if any(word in current_lower for word in self.status_keywords):
|
| 499 |
+
due_date_str = 'Выполнено'
|
| 500 |
+
collected_date = True
|
| 501 |
+
i += 1
|
| 502 |
+
elif 'до конца года' in current_lower:
|
| 503 |
+
due_date_str = 'до конца года'
|
| 504 |
+
collected_date = True
|
| 505 |
+
i += 1
|
| 506 |
+
else:
|
| 507 |
+
for keyword in self.date_keywords:
|
| 508 |
+
if keyword in current:
|
| 509 |
+
date_parts = current.split(keyword)
|
| 510 |
+
if len(date_parts) > 1:
|
| 511 |
+
date_text = date_parts[1].strip()
|
| 512 |
+
date_text = re.sub(r'^\s*[—–-]?\s*', '', date_text)
|
| 513 |
+
due_date_str = re.sub(r'\s+', ' ', date_text)
|
| 514 |
+
|
| 515 |
+
date_match = re.search(r'(\d{2}\.\d{2}\.\d{4})', date_text)
|
| 516 |
+
if date_match:
|
| 517 |
+
try:
|
| 518 |
+
due_date = datetime.strptime(date_match.group(1), '%d.%m.%Y').date()
|
| 519 |
+
except ValueError:
|
| 520 |
+
pass
|
| 521 |
+
collected_date = True
|
| 522 |
+
i += 1
|
| 523 |
+
break
|
| 524 |
+
|
| 525 |
+
if not (collected_resp or collected_date):
|
| 526 |
+
i += 1
|
| 527 |
+
|
| 528 |
+
task = {
|
| 529 |
+
'number': task_number,
|
| 530 |
+
'full_description': description,
|
| 531 |
+
'responsible': responsible,
|
| 532 |
+
'due_date': due_date,
|
| 533 |
+
'due_date_str': due_date_str
|
| 534 |
+
}
|
| 535 |
+
tasks.append(task)
|
| 536 |
+
task_counter += 1
|
| 537 |
+
else:
|
| 538 |
+
i += 1
|
| 539 |
+
|
| 540 |
+
return tasks
|
| 541 |
+
|
| 542 |
+
def _parse_simple_list(self, lines: List[str]) -> List[Dict]:
|
| 543 |
+
tasks = []
|
| 544 |
+
current_task = None
|
| 545 |
+
current_description = []
|
| 546 |
+
|
| 547 |
+
for line in lines:
|
| 548 |
+
line = line.strip()
|
| 549 |
+
if not line:
|
| 550 |
+
continue
|
| 551 |
+
|
| 552 |
+
task_match = re.match(r'^(\d+)\.\s+(.*)', line)
|
| 553 |
+
|
| 554 |
+
if task_match:
|
| 555 |
+
if current_task:
|
| 556 |
+
full_desc = ' '.join(current_description)
|
| 557 |
+
full_desc = re.sub(r'\s+', ' ', full_desc)
|
| 558 |
+
current_task['full_description'] = full_desc
|
| 559 |
+
tasks.append(current_task)
|
| 560 |
+
|
| 561 |
+
task_num = task_match.group(1)
|
| 562 |
+
task_text = task_match.group(2)
|
| 563 |
+
|
| 564 |
+
current_task = {
|
| 565 |
+
'number': int(task_num),
|
| 566 |
+
'full_description': '',
|
| 567 |
+
'responsible': '',
|
| 568 |
+
'due_date': None,
|
| 569 |
+
'due_date_str': ''
|
| 570 |
+
}
|
| 571 |
+
current_description = [task_text]
|
| 572 |
+
|
| 573 |
+
elif current_task:
|
| 574 |
+
current_description.append(line)
|
| 575 |
+
|
| 576 |
+
if 'Срок' in line or any(word in line.lower() for word in self.status_keywords + ['до конца года']):
|
| 577 |
+
line_lower = line.lower()
|
| 578 |
+
|
| 579 |
+
if any(word in line_lower for word in self.status_keywords):
|
| 580 |
+
current_task['due_date_str'] = 'Выполнено'
|
| 581 |
+
elif 'до конца года' in line_lower:
|
| 582 |
+
current_task['due_date_str'] = 'до конца года'
|
| 583 |
+
else:
|
| 584 |
+
date_match = re.search(rf'Срок\s*[—–-]?\s*(\d{{2}}\.\d{{2}}\.\d{{4}})', line)
|
| 585 |
+
if date_match:
|
| 586 |
+
date_str = date_match.group(1).strip()
|
| 587 |
+
current_task['due_date_str'] = date_str
|
| 588 |
+
try:
|
| 589 |
+
current_task['due_date'] = datetime.strptime(date_str, '%d.%m.%Y').date()
|
| 590 |
+
except ValueError:
|
| 591 |
+
pass
|
| 592 |
+
|
| 593 |
+
for keyword in self.resp_keywords:
|
| 594 |
+
if keyword in line:
|
| 595 |
+
resp_match = re.search(rf'{re.escape(keyword)}\s*[—–-]?\s*([^С]+?)(?:\s+Срок|$)', line)
|
| 596 |
+
if not resp_match:
|
| 597 |
+
resp_match = re.search(rf'{re.escape(keyword)}\s*[—–-]?\s*([^\n]+)', line)
|
| 598 |
+
|
| 599 |
+
if resp_match:
|
| 600 |
+
responsible = resp_match.group(1).strip()
|
| 601 |
+
stop_words = self.date_keywords + ['Выполнено', 'Приложение', 'приложение'] + self.status_keywords
|
| 602 |
+
for stop_word in stop_words:
|
| 603 |
+
if stop_word in responsible.lower():
|
| 604 |
+
responsible = responsible.split(stop_word)[0].strip()
|
| 605 |
+
break
|
| 606 |
+
|
| 607 |
+
# Дополнительная очистка
|
| 608 |
+
responsible = re.sub(r'\s+Срок.*$', '', responsible, flags=re.IGNORECASE)
|
| 609 |
+
responsible = re.sub(r'\s+Выполнено.*$', '', responsible, flags=re.IGNORECASE)
|
| 610 |
+
responsible = re.sub(r'\s+до\s+конца\s+года.*$', '', responsible, flags=re.IGNORECASE)
|
| 611 |
+
|
| 612 |
+
responsible = re.sub(r'\s+', ' ', responsible)
|
| 613 |
+
current_task['responsible'] = responsible
|
| 614 |
+
break
|
| 615 |
+
|
| 616 |
+
if current_task:
|
| 617 |
+
full_desc = ' '.join(current_description)
|
| 618 |
+
full_desc = re.sub(r'\s+', ' ', full_desc)
|
| 619 |
+
current_task['full_description'] = full_desc
|
| 620 |
+
tasks.append(current_task)
|
| 621 |
+
|
| 622 |
+
return tasks
|
| 623 |
+
|
| 624 |
+
def print_tasks(self):
|
| 625 |
+
if not self.tasks:
|
| 626 |
+
print("❌ Задачи не найдены")
|
| 627 |
+
return
|
| 628 |
+
|
| 629 |
+
print(f"\n📋 Найдено задач: {len(self.tasks)}\n")
|
| 630 |
+
print("=" * 80)
|
| 631 |
+
|
| 632 |
+
for task in self.tasks:
|
| 633 |
+
print(f"Задача #{task['number']}")
|
| 634 |
+
print(f"📝 Описание: {task['full_description'][:100]}...")
|
| 635 |
+
print(f"👤 Ответственный: {task['responsible'] or '❌ НЕТ'}")
|
| 636 |
+
print(f"📅 Срок: {task['due_date_str'] or '❌ НЕТ'}")
|
| 637 |
+
print("-" * 40)
|
| 638 |
+
|
| 639 |
+
def to_dataframe(self):
|
| 640 |
+
import pandas as pd
|
| 641 |
+
|
| 642 |
+
data = []
|
| 643 |
+
for task in self.tasks:
|
| 644 |
+
data.append({
|
| 645 |
+
'№': task['number'],
|
| 646 |
+
'Описание': task['full_description'],
|
| 647 |
+
'Ответственный': task.get('responsible', 'Не указан'),
|
| 648 |
+
'Срок': task.get('due_date_str', 'Не указан'),
|
| 649 |
+
'Дата (для сортировки)': task.get('due_date')
|
| 650 |
+
})
|
| 651 |
+
|
| 652 |
+
df = pd.DataFrame(data)
|
| 653 |
+
return df
|
src/summarizer.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 3 |
+
import re
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
class TaskSummarizer:
|
| 7 |
+
def __init__(self, model_name="cointegrated/rut5-base-absum"):
|
| 8 |
+
"""
|
| 9 |
+
Инициализация модели для суммаризации
|
| 10 |
+
"""
|
| 11 |
+
self.model_name = model_name
|
| 12 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 13 |
+
self.tokenizer = None
|
| 14 |
+
self.model = None
|
| 15 |
+
|
| 16 |
+
print(f"🔄 Загрузка модели {model_name}...")
|
| 17 |
+
print(f"📱 Устройство: {self.device}")
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
# Загружаем токенизатор и модель
|
| 21 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 22 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 23 |
+
self.model.to(self.device)
|
| 24 |
+
self.model.eval() # Режим оценки (не обучения)
|
| 25 |
+
print("✅ Модель успешно загружена!")
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f"❌ Ошибка при загрузке модели: {e}")
|
| 28 |
+
print("💡 Попробуйте выполнить: pip install --upgrade transformers torch")
|
| 29 |
+
raise
|
| 30 |
+
|
| 31 |
+
def summarize(self, text: str, max_length: int = 50, min_length: int = 10) -> str:
|
| 32 |
+
"""
|
| 33 |
+
Создает краткую суммаризацию текста задачи
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
text: Полный текст задачи
|
| 37 |
+
max_length: Максимальная длина суммаризации
|
| 38 |
+
min_length: Минимальная длина суммаризации
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
Краткое описание задачи
|
| 42 |
+
"""
|
| 43 |
+
if not text or len(text) < 20:
|
| 44 |
+
return text
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
# Очищаем текст от лишних символов
|
| 48 |
+
text = self._clean_text(text)
|
| 49 |
+
|
| 50 |
+
# Токенизируем входной текст
|
| 51 |
+
inputs = self.tokenizer(
|
| 52 |
+
text,
|
| 53 |
+
max_length=512,
|
| 54 |
+
truncation=True,
|
| 55 |
+
return_tensors="pt"
|
| 56 |
+
).to(self.device)
|
| 57 |
+
|
| 58 |
+
# Генерируем суммаризацию
|
| 59 |
+
with torch.no_grad(): # Отключаем вычисление градиентов для экономии памяти
|
| 60 |
+
summary_ids = self.model.generate(
|
| 61 |
+
inputs.input_ids,
|
| 62 |
+
max_length=max_length,
|
| 63 |
+
min_length=min_length,
|
| 64 |
+
num_beams=4, # Поиск с лучом для лучшего качества
|
| 65 |
+
length_penalty=2.0, # Штраф за длину
|
| 66 |
+
early_stopping=True,
|
| 67 |
+
no_repeat_ngram_size=3 # Избегаем повторений
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Декодируем результат
|
| 71 |
+
summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 72 |
+
|
| 73 |
+
# Постобработка
|
| 74 |
+
summary = self._postprocess_summary(summary)
|
| 75 |
+
|
| 76 |
+
return summary
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"⚠️ Ошибка при суммаризации: {e}")
|
| 80 |
+
# Возвращаем первые 100 символов как запасной вариант
|
| 81 |
+
return text[:100] + "..."
|
| 82 |
+
|
| 83 |
+
def _clean_text(self, text: str) -> str:
|
| 84 |
+
"""Очищает текст от лишних символов"""
|
| 85 |
+
# Удаляем номер задачи в начале (если есть)
|
| 86 |
+
text = re.sub(r'^\d+\.\s*', '', text)
|
| 87 |
+
|
| 88 |
+
# Удаляем информацию об ответственном и сроке
|
| 89 |
+
text = re.sub(r'Отв\.:.*?Срок\s*-\s*\d{2}\.\d{2}\.\d{4}', '', text)
|
| 90 |
+
text = re.sub(r'Отв\.:.*$', '', text, flags=re.MULTILINE)
|
| 91 |
+
text = re.sub(r'Срок\s*-\s*\d{2}\.\d{2}\.\d{4}', '', text)
|
| 92 |
+
|
| 93 |
+
# Удаляем лишние пробелы
|
| 94 |
+
text = re.sub(r'\s+', ' ', text)
|
| 95 |
+
|
| 96 |
+
return text.strip()
|
| 97 |
+
|
| 98 |
+
def _postprocess_summary(self, summary: str) -> str:
|
| 99 |
+
"""Постобработка сгенерированной суммаризации"""
|
| 100 |
+
# Убираем лишние пробелы
|
| 101 |
+
summary = re.sub(r'\s+', ' ', summary)
|
| 102 |
+
|
| 103 |
+
# Убираем точку в конце, если её нет
|
| 104 |
+
if summary and not summary.endswith(('.', '!', '?')):
|
| 105 |
+
summary += '.'
|
| 106 |
+
|
| 107 |
+
# Делаем первую букву заглавной
|
| 108 |
+
if summary:
|
| 109 |
+
summary = summary[0].upper() + summary[1:]
|
| 110 |
+
|
| 111 |
+
return summary
|
| 112 |
+
|
| 113 |
+
def summarize_batch(self, texts, max_length=50, min_length=10):
|
| 114 |
+
"""
|
| 115 |
+
Суммаризация нескольких текстов (для эффективности)
|
| 116 |
+
"""
|
| 117 |
+
summaries = []
|
| 118 |
+
for text in texts:
|
| 119 |
+
summary = self.summarize(text, max_length, min_length)
|
| 120 |
+
summaries.append(summary)
|
| 121 |
+
return summaries
|
| 122 |
+
|
web/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
web/icons/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
web/icons/calendarLogo.svg
ADDED
|
|
web/icons/favicon.svg
ADDED
|
|
web/icons/sheetsLogo-svgrepo-com.svg
ADDED
|
|
web/icons/titleIcon.svg
ADDED
|
|
web/icons/upload.svg
ADDED
|
|
web/index.html
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="ru">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Парсер задач</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@100..900&display=swap" rel="stylesheet">
|
| 10 |
+
<link rel="shortcut icon" href="/task-parser/web/icons/favicon.svg" type="image/x-icon">
|
| 11 |
+
<link rel="stylesheet" href="/task-parser/web/style.css">
|
| 12 |
+
</head>
|
| 13 |
+
<body>
|
| 14 |
+
<div class="container">
|
| 15 |
+
<header>
|
| 16 |
+
<div class="title">
|
| 17 |
+
<img src="/task-parser/web/icons/titleIcon.svg" alt="">
|
| 18 |
+
<h1>Твой парсер</h1>
|
| 19 |
+
</div>
|
| 20 |
+
<p>Загрузи PDF или Word документ, чтобы извлечь все задачи</p>
|
| 21 |
+
</header>
|
| 22 |
+
|
| 23 |
+
<main>
|
| 24 |
+
<div id="dropZone" class="upload-area">
|
| 25 |
+
<div class="upload-item">
|
| 26 |
+
<img src="/task-parser/web/icons/upload.svg" alt="">
|
| 27 |
+
<div class="upload-text">Перетащи сюда файлы</div>
|
| 28 |
+
<div class="upload-advice">или кликни для выбора</div>
|
| 29 |
+
</div>
|
| 30 |
+
</div>
|
| 31 |
+
|
| 32 |
+
<input type="file" id="fileInput" style="display:none" multiple accept=".pdf,.docx,.doc">
|
| 33 |
+
|
| 34 |
+
<div id="selectedFiles" class="selected-files" style="display:none">
|
| 35 |
+
<strong>Выбрано файлов:</strong> <span id="filesCount">0</span>
|
| 36 |
+
<div id="filesList" class="files-list"></div>
|
| 37 |
+
</div>
|
| 38 |
+
|
| 39 |
+
<div class="upload-choice">
|
| 40 |
+
<div class="option-card">
|
| 41 |
+
<div class="option-choice">
|
| 42 |
+
<img src="/task-parser/web/icons/sheetsLogo-svgrepo-com.svg" alt="">
|
| 43 |
+
<h3>Google Sheets</h3>
|
| 44 |
+
</div>
|
| 45 |
+
<label>
|
| 46 |
+
<input type="checkbox" id="exportSheets"> Экспортировать
|
| 47 |
+
</label>
|
| 48 |
+
<input type="text" id="sheetsUrl" class="option-input" placeholder="URL таблицы" disabled>
|
| 49 |
+
</div>
|
| 50 |
+
<div class="option-card">
|
| 51 |
+
<div class="option-choice">
|
| 52 |
+
<img src="/task-parser/web/icons/calendarLogo.svg" alt="">
|
| 53 |
+
<h3>Google Calendar</h3>
|
| 54 |
+
</div>
|
| 55 |
+
<label>
|
| 56 |
+
<input type="checkbox" id="exportCalendar"> Экспортировать
|
| 57 |
+
</label>
|
| 58 |
+
<input type="text" id="calendarId" class="option-input" placeholder="ID календаря" disabled>
|
| 59 |
+
</div>
|
| 60 |
+
</div>
|
| 61 |
+
|
| 62 |
+
<button id="processBtn" class="btn" disabled>Обработать документы</button>
|
| 63 |
+
</main>
|
| 64 |
+
|
| 65 |
+
<section>
|
| 66 |
+
<div id="loader" class="loader"></div>
|
| 67 |
+
|
| 68 |
+
<div id="results" class="results">
|
| 69 |
+
<h2>Результаты обработки</h2>
|
| 70 |
+
<div id="stats" class="stats"></div>
|
| 71 |
+
<div class="table-wrapper">
|
| 72 |
+
<table id="tasksTable">
|
| 73 |
+
<thead>
|
| 74 |
+
<tr>
|
| 75 |
+
<th>№</th>
|
| 76 |
+
<th>Краткое описание</th>
|
| 77 |
+
<th>Описание</th>
|
| 78 |
+
<th>Ответственный</th>
|
| 79 |
+
<th>Срок</th>
|
| 80 |
+
</tr>
|
| 81 |
+
</thead>
|
| 82 |
+
<tbody id="tasksBody"></tbody>
|
| 83 |
+
</table>
|
| 84 |
+
</div>
|
| 85 |
+
<button id="downloadBtn" class="download-btn" style="display:none">Скачать Excel</button>
|
| 86 |
+
</div>
|
| 87 |
+
</section>
|
| 88 |
+
</div>
|
| 89 |
+
|
| 90 |
+
<script src="/task-parser/web/script.js"></script>
|
| 91 |
+
</body>
|
| 92 |
+
</html>
|
web/script.js
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const dropZone = document.getElementById('dropZone');
|
| 2 |
+
const fileInput = document.getElementById('fileInput');
|
| 3 |
+
const selectedFilesDiv = document.getElementById('selectedFiles');
|
| 4 |
+
const filesCountSpan = document.getElementById('filesCount');
|
| 5 |
+
const filesListDiv = document.getElementById('filesList');
|
| 6 |
+
const processBtn = document.getElementById('processBtn');
|
| 7 |
+
const loader = document.getElementById('loader');
|
| 8 |
+
const resultsDiv = document.getElementById('results');
|
| 9 |
+
const tasksBody = document.getElementById('tasksBody');
|
| 10 |
+
const statsDiv = document.getElementById('stats');
|
| 11 |
+
const downloadBtn = document.getElementById('downloadBtn');
|
| 12 |
+
const exportSheets = document.getElementById('exportSheets');
|
| 13 |
+
const sheetsUrl = document.getElementById('sheetsUrl');
|
| 14 |
+
const exportCalendar = document.getElementById('exportCalendar');
|
| 15 |
+
const calendarId = document.getElementById('calendarId');
|
| 16 |
+
|
| 17 |
+
let selectedFiles = [];
|
| 18 |
+
let excelData = null;
|
| 19 |
+
|
| 20 |
+
function updateFilesList() {
|
| 21 |
+
if (selectedFiles.length === 0) {
|
| 22 |
+
selectedFilesDiv.style.display = 'none';
|
| 23 |
+
processBtn.disabled = true;
|
| 24 |
+
return;
|
| 25 |
+
}
|
| 26 |
+
selectedFilesDiv.style.display = 'block';
|
| 27 |
+
filesCountSpan.textContent = selectedFiles.length;
|
| 28 |
+
let filesHtml = '';
|
| 29 |
+
for (let i = 0; i < selectedFiles.length; i++) {
|
| 30 |
+
const file = selectedFiles[i];
|
| 31 |
+
filesHtml += '<div class="file-item"><span>📄 ' + file.name + '</span><button onclick="removeFile(' + i + ')">✕</button></div>';
|
| 32 |
+
}
|
| 33 |
+
filesListDiv.innerHTML = filesHtml;
|
| 34 |
+
processBtn.disabled = false;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
window.removeFile = function(index) {
|
| 38 |
+
selectedFiles.splice(index, 1);
|
| 39 |
+
updateFilesList();
|
| 40 |
+
};
|
| 41 |
+
|
| 42 |
+
function addFiles(files) {
|
| 43 |
+
const allowed = ['.pdf', '.docx', '.doc'];
|
| 44 |
+
const valid = [];
|
| 45 |
+
for (let i = 0; i < files.length; i++) {
|
| 46 |
+
const file = files[i];
|
| 47 |
+
const ext = '.' + file.name.split('.').pop().toLowerCase();
|
| 48 |
+
if (allowed.includes(ext)) {
|
| 49 |
+
valid.push(file);
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
if (valid.length === 0) {
|
| 53 |
+
alert('Поддерживаются только PDF, DOCX, DOC');
|
| 54 |
+
return;
|
| 55 |
+
}
|
| 56 |
+
for (let i = 0; i < valid.length; i++) {
|
| 57 |
+
const newFile = valid[i];
|
| 58 |
+
let exists = false;
|
| 59 |
+
for (let j = 0; j < selectedFiles.length; j++) {
|
| 60 |
+
if (selectedFiles[j].name === newFile.name && selectedFiles[j].size === newFile.size) {
|
| 61 |
+
exists = true;
|
| 62 |
+
break;
|
| 63 |
+
}
|
| 64 |
+
}
|
| 65 |
+
if (!exists) {
|
| 66 |
+
selectedFiles.push(newFile);
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
updateFilesList();
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
dropZone.addEventListener('click', function() {
|
| 73 |
+
fileInput.click();
|
| 74 |
+
});
|
| 75 |
+
|
| 76 |
+
dropZone.addEventListener('dragover', function(e) {
|
| 77 |
+
e.preventDefault();
|
| 78 |
+
});
|
| 79 |
+
|
| 80 |
+
dropZone.addEventListener('drop', function(e) {
|
| 81 |
+
e.preventDefault();
|
| 82 |
+
if (e.dataTransfer.files.length) {
|
| 83 |
+
addFiles(e.dataTransfer.files);
|
| 84 |
+
}
|
| 85 |
+
});
|
| 86 |
+
|
| 87 |
+
fileInput.addEventListener('change', function(e) {
|
| 88 |
+
if (e.target.files.length) {
|
| 89 |
+
addFiles(e.target.files);
|
| 90 |
+
}
|
| 91 |
+
fileInput.value = '';
|
| 92 |
+
});
|
| 93 |
+
|
| 94 |
+
exportSheets.addEventListener('change', function() {
|
| 95 |
+
sheetsUrl.disabled = !exportSheets.checked;
|
| 96 |
+
});
|
| 97 |
+
|
| 98 |
+
exportCalendar.addEventListener('change', function() {
|
| 99 |
+
calendarId.disabled = !exportCalendar.checked;
|
| 100 |
+
});
|
| 101 |
+
|
| 102 |
+
function displayResults(tasks, stats, filesInfo) {
|
| 103 |
+
let statsHtml = '<div class="stat-card"><div class="stat-value">' + stats.total + '</div><div class="stat-label">Всего задач</div></div>';
|
| 104 |
+
statsHtml += '<div class="stat-card"><div class="stat-value">' + stats.with_responsible + '</div><div class="stat-label">С ответственным</div></div>';
|
| 105 |
+
statsHtml += '<div class="stat-card"><div class="stat-value">' + stats.with_date + '</div><div class="stat-label">С датой</div></div>';
|
| 106 |
+
if (filesInfo && filesInfo.length) {
|
| 107 |
+
statsHtml += '<div class="stat-card"><div class="stat-value">' + filesInfo.length + '</div><div class="stat-label">Обработано файлов</div></div>';
|
| 108 |
+
}
|
| 109 |
+
statsDiv.innerHTML = statsHtml;
|
| 110 |
+
|
| 111 |
+
let tasksHtml = '';
|
| 112 |
+
for (let i = 0; i < tasks.length; i++) {
|
| 113 |
+
const task = tasks[i];
|
| 114 |
+
const summary = task.summary || (task.full_description || '').substring(0, 80);
|
| 115 |
+
const description = (task.full_description || '').substring(0, 100);
|
| 116 |
+
tasksHtml += '<tr>';
|
| 117 |
+
tasksHtml += '<td>' + task.number + '</td>';
|
| 118 |
+
tasksHtml += '<td>' + summary + ((task.full_description || '').length > 80 ? '...' : '') + '</td>';
|
| 119 |
+
tasksHtml += '<td>' + description + ((task.full_description || '').length > 100 ? '...' : '') + '</td>';
|
| 120 |
+
tasksHtml += '<td>' + (task.responsible || '-') + '</td>';
|
| 121 |
+
tasksHtml += '<td>' + (task.due_date_str || '-') + '</td>';
|
| 122 |
+
tasksHtml += '</tr>';
|
| 123 |
+
}
|
| 124 |
+
tasksBody.innerHTML = tasksHtml;
|
| 125 |
+
resultsDiv.style.display = 'block';
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
function setupDownload(data) {
|
| 129 |
+
if (!data) return;
|
| 130 |
+
downloadBtn.style.display = 'inline-block';
|
| 131 |
+
downloadBtn.onclick = function() {
|
| 132 |
+
try {
|
| 133 |
+
const binaryString = atob(data);
|
| 134 |
+
const bytes = new Uint8Array(binaryString.length);
|
| 135 |
+
for (let i = 0; i < binaryString.length; i++) {
|
| 136 |
+
bytes[i] = binaryString.charCodeAt(i);
|
| 137 |
+
}
|
| 138 |
+
const blob = new Blob([bytes], { type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' });
|
| 139 |
+
const url = URL.createObjectURL(blob);
|
| 140 |
+
const a = document.createElement('a');
|
| 141 |
+
a.href = url;
|
| 142 |
+
const now = new Date();
|
| 143 |
+
const year = now.getFullYear();
|
| 144 |
+
const month = String(now.getMonth() + 1).padStart(2, '0');
|
| 145 |
+
const day = String(now.getDate()).padStart(2, '0');
|
| 146 |
+
const hours = String(now.getHours()).padStart(2, '0');
|
| 147 |
+
const minutes = String(now.getMinutes()).padStart(2, '0');
|
| 148 |
+
const seconds = String(now.getSeconds()).padStart(2, '0');
|
| 149 |
+
const timestamp = year + '-' + month + '-' + day + 'T' + hours + '-' + minutes + '-' + seconds;
|
| 150 |
+
a.download = 'tasks_' + timestamp + '.xlsx';
|
| 151 |
+
a.click();
|
| 152 |
+
URL.revokeObjectURL(url);
|
| 153 |
+
} catch (err) {
|
| 154 |
+
console.error('Ошибка скачивания:', err);
|
| 155 |
+
alert('Ошибка при скачивании файла');
|
| 156 |
+
}
|
| 157 |
+
};
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
processBtn.addEventListener('click', async function() {
|
| 161 |
+
if (selectedFiles.length === 0) return;
|
| 162 |
+
|
| 163 |
+
processBtn.disabled = true;
|
| 164 |
+
loader.style.display = 'block';
|
| 165 |
+
resultsDiv.style.display = 'none';
|
| 166 |
+
downloadBtn.style.display = 'none';
|
| 167 |
+
|
| 168 |
+
const formData = new FormData();
|
| 169 |
+
|
| 170 |
+
for (let i = 0; i < selectedFiles.length; i++) {
|
| 171 |
+
formData.append('files', selectedFiles[i]);
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
formData.append('export_to_sheets', exportSheets.checked);
|
| 175 |
+
formData.append('export_to_calendar', exportCalendar.checked);
|
| 176 |
+
formData.append('sheets_url', sheetsUrl.value);
|
| 177 |
+
formData.append('calendar_id', calendarId.value);
|
| 178 |
+
|
| 179 |
+
// ===== ПРОВЕРКА: что отправляем =====
|
| 180 |
+
console.log('Отправляю FormData:');
|
| 181 |
+
for (let pair of formData.entries()) {
|
| 182 |
+
console.log(pair[0], '=', pair[1]);
|
| 183 |
+
}
|
| 184 |
+
// ===================================
|
| 185 |
+
|
| 186 |
+
console.log('Отправляю:', {
|
| 187 |
+
files: selectedFiles.length,
|
| 188 |
+
export_to_calendar: exportCalendar.checked,
|
| 189 |
+
calendar_id: calendarId.value
|
| 190 |
+
});
|
| 191 |
+
|
| 192 |
+
try {
|
| 193 |
+
const response = await fetch('/parse-batch', {
|
| 194 |
+
method: 'POST',
|
| 195 |
+
body: formData
|
| 196 |
+
});
|
| 197 |
+
|
| 198 |
+
console.log('Ответ получен, статус:', response.status);
|
| 199 |
+
|
| 200 |
+
if (!response.ok) {
|
| 201 |
+
throw new Error('HTTP ошибка: ' + response.status);
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
const data = await response.json();
|
| 205 |
+
console.log('Данные:', data);
|
| 206 |
+
|
| 207 |
+
if (data.success) {
|
| 208 |
+
if (data.excel_base64) {
|
| 209 |
+
excelData = data.excel_base64;
|
| 210 |
+
setupDownload(excelData);
|
| 211 |
+
}
|
| 212 |
+
displayResults(data.tasks, data.statistics, data.files);
|
| 213 |
+
|
| 214 |
+
if (data.calendar_export === 'success') {
|
| 215 |
+
alert('Задачи добавлены в Google Calendar');
|
| 216 |
+
} else if (data.calendar_export) {
|
| 217 |
+
alert('Ошибка Calendar: ' + data.calendar_export);
|
| 218 |
+
}
|
| 219 |
+
} else {
|
| 220 |
+
alert('Ошибка: ' + (data.error || 'Неизвестная ошибка'));
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
} catch (err) {
|
| 224 |
+
console.error('Ошибка:', err);
|
| 225 |
+
alert('Ошибка соединения: ' + err.message + '\n\nПроверь, что сервер запущен (python3 backend.py)');
|
| 226 |
+
} finally {
|
| 227 |
+
processBtn.disabled = false;
|
| 228 |
+
loader.style.display = 'none';
|
| 229 |
+
}
|
| 230 |
+
});
|
web/style.css
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
* {
|
| 2 |
+
margin: 0;
|
| 3 |
+
padding: 0;
|
| 4 |
+
box-sizing: border-box;
|
| 5 |
+
}
|
| 6 |
+
|
| 7 |
+
body {
|
| 8 |
+
font-family: 'Roboto', sans-serif;
|
| 9 |
+
background: linear-gradient(135deg, #6b6bae 0%, #192955 100%);
|
| 10 |
+
color: white;
|
| 11 |
+
padding: 20px;
|
| 12 |
+
min-height: 100vh;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
.container {
|
| 16 |
+
max-width: 1200px;
|
| 17 |
+
margin: 0 auto;
|
| 18 |
+
width: 100%;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
.title {
|
| 22 |
+
display: flex;
|
| 23 |
+
justify-content: center;
|
| 24 |
+
align-items: center;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
.title img {
|
| 28 |
+
height: 45px;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
h1 {
|
| 32 |
+
text-align: center;
|
| 33 |
+
margin-bottom: 10px;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
p {
|
| 37 |
+
text-align: center;
|
| 38 |
+
margin-bottom: 30px;
|
| 39 |
+
opacity: 0.9;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.upload-area {
|
| 43 |
+
border: 3px dashed #667eea;
|
| 44 |
+
border-radius: 15px;
|
| 45 |
+
background: #2a2a3a;
|
| 46 |
+
text-align: center;
|
| 47 |
+
padding: 50px;
|
| 48 |
+
cursor: pointer;
|
| 49 |
+
transition: all 0.4s ease;
|
| 50 |
+
margin-bottom: 20px;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
.upload-area:hover {
|
| 54 |
+
background: #333344;
|
| 55 |
+
border-color: #764ba2;
|
| 56 |
+
}
|
| 57 |
+
.upload-area img {
|
| 58 |
+
max-width: 100%;
|
| 59 |
+
height: 90px;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
.upload-text {
|
| 63 |
+
font-size: 18px;
|
| 64 |
+
margin-bottom: 10px;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.upload-advice {
|
| 68 |
+
font-size: 13px;
|
| 69 |
+
opacity: 0.6;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.selected-files {
|
| 73 |
+
background: #1e2a1e;
|
| 74 |
+
padding: 15px;
|
| 75 |
+
border-radius: 10px;
|
| 76 |
+
margin-bottom: 20px;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.files-list {
|
| 80 |
+
margin-top: 10px;
|
| 81 |
+
max-height: 120px;
|
| 82 |
+
overflow-y: auto;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
.file-item {
|
| 86 |
+
background: #2a3a2a;
|
| 87 |
+
padding: 8px 12px;
|
| 88 |
+
border-radius: 8px;
|
| 89 |
+
margin-bottom: 5px;
|
| 90 |
+
display: flex;
|
| 91 |
+
justify-content: space-between;
|
| 92 |
+
align-items: center;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
.file-item button {
|
| 96 |
+
background: #c62828;
|
| 97 |
+
color: white;
|
| 98 |
+
border: none;
|
| 99 |
+
border-radius: 50%;
|
| 100 |
+
width: 24px;
|
| 101 |
+
height: 24px;
|
| 102 |
+
cursor: pointer;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
.upload-choice {
|
| 106 |
+
display: flex;
|
| 107 |
+
gap: 20px;
|
| 108 |
+
margin-bottom: 20px;
|
| 109 |
+
flex-wrap: wrap;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.option-card {
|
| 113 |
+
flex: 1;
|
| 114 |
+
background: #2a2a3a;
|
| 115 |
+
padding: 20px;
|
| 116 |
+
border-radius: 10px;
|
| 117 |
+
min-width: 250px;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
.option-choice {
|
| 121 |
+
display: flex;
|
| 122 |
+
gap: 5px;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.option-choice img {
|
| 126 |
+
max-width: 100%;
|
| 127 |
+
height: 30px;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.option-card h3 {
|
| 131 |
+
margin-bottom: 15px;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
.option-input {
|
| 135 |
+
width: 100%;
|
| 136 |
+
padding: 10px;
|
| 137 |
+
margin-top: 10px;
|
| 138 |
+
border-radius: 8px;
|
| 139 |
+
border: none;
|
| 140 |
+
background: #1e1e2a;
|
| 141 |
+
color: white;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
.btn {
|
| 145 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 146 |
+
color: white;
|
| 147 |
+
border: none;
|
| 148 |
+
padding: 15px 30px;
|
| 149 |
+
border-radius: 50px;
|
| 150 |
+
font-size: 16px;
|
| 151 |
+
font-weight: 600;
|
| 152 |
+
cursor: pointer;
|
| 153 |
+
width: 100%;
|
| 154 |
+
transition: transform 0.3s;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.btn:hover:not(:disabled) {
|
| 158 |
+
transform: translateY(-2px);
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
.btn:disabled {
|
| 162 |
+
opacity: 0.5;
|
| 163 |
+
cursor: not-allowed;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
.loader {
|
| 167 |
+
display: none;
|
| 168 |
+
text-align: center;
|
| 169 |
+
margin: 20px 0;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.loader::before {
|
| 173 |
+
content: "";
|
| 174 |
+
display: inline-block;
|
| 175 |
+
width: 40px;
|
| 176 |
+
height: 40px;
|
| 177 |
+
border: 4px solid #f3f3f3;
|
| 178 |
+
border-top: 4px solid #667eea;
|
| 179 |
+
border-radius: 50%;
|
| 180 |
+
animation: spin 1s linear infinite;
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
@keyframes spin {
|
| 184 |
+
0% { transform: rotate(0deg); }
|
| 185 |
+
100% { transform: rotate(360deg); }
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
.results {
|
| 189 |
+
display: none;
|
| 190 |
+
margin-top: 30px;
|
| 191 |
+
background: #2a2a3a;
|
| 192 |
+
padding: 20px;
|
| 193 |
+
border-radius: 15px;
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
.stats {
|
| 197 |
+
display: flex;
|
| 198 |
+
gap: 20px;
|
| 199 |
+
margin-bottom: 20px;
|
| 200 |
+
flex-wrap: wrap;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
.stat-card {
|
| 204 |
+
flex: 1;
|
| 205 |
+
background: #1e1e2a;
|
| 206 |
+
padding: 20px;
|
| 207 |
+
border-radius: 10px;
|
| 208 |
+
text-align: center;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
.stat-value {
|
| 212 |
+
font-size: 32px;
|
| 213 |
+
font-weight: bold;
|
| 214 |
+
color: #667eea;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
.table-wrapper {
|
| 218 |
+
overflow-x: auto;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
table {
|
| 222 |
+
width: 100%;
|
| 223 |
+
border-collapse: collapse;
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
th, td {
|
| 227 |
+
padding: 12px;
|
| 228 |
+
text-align: left;
|
| 229 |
+
border-bottom: 1px solid #3a3a4a;
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
th {
|
| 233 |
+
background: #1e1e2a;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
.download-btn {
|
| 237 |
+
background: #4caf50;
|
| 238 |
+
color: white;
|
| 239 |
+
border: none;
|
| 240 |
+
padding: 12px 24px;
|
| 241 |
+
border-radius: 25px;
|
| 242 |
+
margin-top: 20px;
|
| 243 |
+
cursor: pointer;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
@media (max-width: 768px) {
|
| 247 |
+
.upload-choice {
|
| 248 |
+
flex-direction: column;
|
| 249 |
+
}
|
| 250 |
+
.stats {
|
| 251 |
+
flex-direction: column;
|
| 252 |
+
}
|
| 253 |
+
}
|