| | import pytest |
| | import pandas as pd |
| | from langchain.schema import Document |
| | from app.processor import summarize |
| |
|
| | @pytest.fixture |
| | def dummy_pages(): |
| | return [ |
| | Document( |
| | page_content="LangChain is a framework for developing LLM-based applications.", |
| | metadata={"source": "sample_paper.pdf", "page": 1}, |
| | ), |
| | Document( |
| | page_content="It provides utilities for prompt management, chains, and agents.", |
| | metadata={"source": "sample_paper.pdf", "page": 2}, |
| | ), |
| | ] |
| |
|
| | def test_summarize_documents_returns_dataframe(dummy_pages): |
| | summary_df = summarize_documents(dummy_pages) |
| |
|
| | assert isinstance(summary_df, pd.DataFrame) |
| | assert "file_name" in summary_df.columns |
| | assert "page_number" in summary_df.columns |
| | assert "chunks" in summary_df.columns |
| | assert "concise_summary" in summary_df.columns |
| | assert len(summary_df) == len(dummy_pages) |
| | assert summary_df["page_number"].iloc[0] == 1 |
| |
|