Spaces:

balancet1
/

task-parser

Sleeping

Artem Nikolaev

Use StaticFiles for web directory

d99c55e about 1 month ago

9.01 kB

	from fastapi import FastAPI, UploadFile, File, Request
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse, HTMLResponse
	import tempfile
	import os
	import sys
	import base64
	import uvicorn
	import pandas as pd
	from io import BytesIO

	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

	from src.parser import TaskParser
	from src.excel_exporter import ExcelExporter
	from src.summarizer import TaskSummarizer
	from src.google_sheets import GoogleSheetsExporter
	from src.google_calendar import GoogleCalendarExporter

	import json

	GOOGLE_CREDS = os.environ.get("GOOGLE_CREDENTIALS")
	if GOOGLE_CREDS:
	os.makedirs("credentials", exist_ok=True)
	with open("credentials/google-credentials.json", "w") as f:
	f.write(GOOGLE_CREDS)
	print("✅ Google-ключ загружен из секретов")

	app = FastAPI(title="PDF Task Parser API")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	from fastapi.staticfiles import StaticFiles

	app.mount("/web", StaticFiles(directory="web"), name="web")

	TEMP_DIR = "temp_uploads"
	os.makedirs(TEMP_DIR, exist_ok=True)

	summarizer = None

	def get_summarizer():
	global summarizer
	if summarizer is None:
	try:
	print("🔄 Загрузка суммаризатора...")
	summarizer = TaskSummarizer()
	print("✅ Суммаризатор загружен")
	except Exception as e:
	print(f"⚠️ Суммаризатор не загружен: {e}")
	summarizer = False
	return summarizer if summarizer is not False else None


	@app.get("/")
	async def root():
	return {"message": "PDF Task Parser API", "status": "running"}

	@app.get("/app")
	async def get_app():
	html_path = os.path.join("web", "index.html")
	if os.path.exists(html_path):
	with open(html_path, "r", encoding="utf-8") as f:
	return HTMLResponse(content=f.read())
	return HTMLResponse(content="<h1>index.html not found</h1>", status_code=404)

	@app.post("/parse-batch")
	async def parse_batch(request: Request):
	print("\n" + "="*60)
	print("🔍 ПОЛУЧЕН ЗАПРОС НА ПАРСИНГ")
	print("="*60)

	form = await request.form()

	files = form.getlist("files")
	export_to_sheets = form.get("export_to_sheets", "false").lower() == "true"
	export_to_calendar = form.get("export_to_calendar", "false").lower() == "true"
	sheets_url = form.get("sheets_url", "")
	calendar_id = form.get("calendar_id", "")

	print(f"📄 Файлов: {len(files)}")
	print(f"📊 Экспорт в Sheets: {export_to_sheets}")
	print(f"📅 Экспорт в Calendar: {export_to_calendar}")
	print(f"🔗 URL Sheets: {sheets_url}")
	print(f"📆 ID Calendar: {calendar_id}")
	print("="*60 + "\n")

	all_results = []
	all_tasks_data = []
	sheets_export_status = None
	calendar_export_status = None

	all_dfs = []

	for file in files:
	file_ext = os.path.splitext(file.filename)[1].lower()
	with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
	content = await file.read()
	tmp.write(content)
	tmp_path = tmp.name

	try:
	parser = TaskParser(tmp_path)
	text = parser.extract_text()
	tasks = parser.parse_tasks(text)

	if tasks:
	summarizer = get_summarizer()
	for task in tasks:
	task['source'] = file.filename
	if summarizer:
	try:
	task['summary'] = summarizer.summarize(task['full_description'])
	except Exception:
	task['summary'] = task['full_description'][:100] + "..."
	else:
	task['summary'] = task['full_description'][:100] + "..."
	all_tasks_data.append(task)

	df_data = []
	for task in tasks:
	df_data.append({
	'№': task['number'],
	'Краткое описание': task.get('summary', ''),
	'Описание': task['full_description'],
	'Ответственный': task.get('responsible', ''),
	'Срок': task.get('due_date_str', '')
	})
	df = pd.DataFrame(df_data)
	all_dfs.append({
	"df": df,
	"filename": file.filename,
	"tasks": tasks
	})

	all_results.append({
	"filename": file.filename,
	"tasks": tasks,
	"count": len(tasks)
	})

	os.remove(tmp_path)

	except Exception as e:
	print(f"Ошибка в {file.filename}: {e}")
	if os.path.exists(tmp_path):
	os.remove(tmp_path)

	if not all_tasks_data:
	return JSONResponse({
	"success": False,
	"error": "Задачи не найдены ни в одном файле"
	})

	# ===== ЭКСПОРТ В GOOGLE SHEETS =====
	if export_to_sheets and sheets_url:
	try:
	print("📊 Экспорт в Google Sheets...")
	sheets_exporter = GoogleSheetsExporter()
	if sheets_exporter.use_existing_spreadsheet(sheets_url):
	for item in all_dfs:
	sheet_name = os.path.splitext(item['filename'])[0][:30]
	sheet_name = sheet_name.replace(' ', '_').replace('/', '_')
	sheets_exporter.export_dataframe(item['df'], sheet_name)
	sheets_export_status = "success"
	print("✅ Экспорт в Google Sheets выполнен")
	else:
	sheets_export_status = "error: таблица не найдена"
	print("❌ Таблица не найдена")
	except Exception as e:
	sheets_export_status = f"error: {str(e)}"
	print(f"❌ Ошибка Sheets: {e}")

	# ===== ЭКСПОРТ В GOOGLE CALENDAR =====
	if export_to_calendar and calendar_id:
	try:
	print(f"📅 Экспорт в Google Calendar...")
	print(f" ID календаря: {calendar_id}")
	print(f" Количество задач: {len(all_tasks_data)}")

	calendar_exporter = GoogleCalendarExporter(calendar_id=calendar_id)
	calendar_exporter.create_events_from_tasks(all_tasks_data)
	calendar_export_status = "success"
	print("✅ Экспорт в Google Calendar выполнен")
	except Exception as e:
	calendar_export_status = f"error: {str(e)}"
	print(f"❌ Ошибка Calendar: {e}")
	else:
	print(f"⚠️ Экспорт в Calendar пропущен: export_to_calendar={export_to_calendar}, calendar_id={calendar_id}")

	# ===== СОЗДАЁМ EXCEL =====
	exporter = ExcelExporter()

	for item in all_dfs:
	sheet_name = os.path.splitext(item['filename'])[0][:30]
	sheet_name = sheet_name.replace(' ', '_').replace('/', '_').replace('\\', '_')
	exporter.add_sheet(item['df'], sheet_name)

	all_df_data = []
	for task in all_tasks_data:
	all_df_data.append({
	'Источник': task.get('source', ''),
	'№': task['number'],
	'Краткое описание': task.get('summary', ''),
	'Описание': task['full_description'],
	'Ответственный': task.get('responsible', ''),
	'Срок': task.get('due_date_str', '')
	})
	all_df = pd.DataFrame(all_df_data)
	exporter.add_sheet(all_df, "Все задачи")

	excel_buffer = BytesIO()
	exporter.save_to_buffer(excel_buffer)
	excel_bytes = excel_buffer.getvalue()
	excel_base64 = base64.b64encode(excel_bytes).decode('ascii')

	total_stats = {
	"total": len(all_tasks_data),
	"with_responsible": sum(1 for t in all_tasks_data if t.get('responsible')),
	"with_date": sum(1 for t in all_tasks_data if t.get('due_date_str')),
	"files_count": len(all_results)
	}

	return {
	"success": True,
	"tasks": all_tasks_data,
	"statistics": total_stats,
	"excel_base64": excel_base64,
	"files": [{"name": r["filename"], "count": r["count"]} for r in all_results],
	"sheets_export": sheets_export_status,
	"calendar_export": calendar_export_status
	}


	if __name__ == "__main__":
	print("🚀 Запуск PDF Task Parser API")
	uvicorn.run(app, host="0.0.0.0", port=8000)