ManojParvatham commited on
Commit
ab9ff53
·
verified ·
1 Parent(s): 00eee04

Upload 5 files

Browse files
agent_for_unit4/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .agent import manager_agent, prepare_for_input
2
+
3
+ __all__ = ["manager_agent", "prepare_for_input"]
agent_for_unit4/agent.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from pathlib import Path
4
+ from textwrap import dedent
5
+
6
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel, VisitWebpageTool
7
+
8
+ from .tools import RetrieveCSVStorageTool, SpeechRecognitionTool, VisualQATool, WikiTool, fetch_text_content, read_excel
9
+
10
+
11
+ def configure_open_telemetry() -> None:
12
+ try:
13
+ from openinference.instrumentation.smolagents import SmolagentsInstrumentor
14
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
15
+ from opentelemetry.sdk.trace import TracerProvider
16
+ from opentelemetry.sdk.trace.export import SimpleSpanProcessor
17
+ except ImportError:
18
+ print("OpenTelemetry packages are not installed. Please install them to enable tracing.")
19
+ return None
20
+
21
+ try:
22
+ langfuse_public_key = os.environ["LANGFUSE_PUBLIC_KEY"]
23
+ langfuse_secret_key = os.environ["LANGFUSE_SECRET_KEY"]
24
+ except KeyError:
25
+ print("LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY must be set in the environment variables.")
26
+ return None
27
+
28
+ LANGFUSE_AUTH = base64.b64encode(f"{langfuse_public_key}:{langfuse_secret_key}".encode()).decode()
29
+ os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://cloud.langfuse.com/api/public/otel"
30
+ os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"Authorization=Basic {LANGFUSE_AUTH}"
31
+
32
+ trace_provider = TracerProvider()
33
+ trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))
34
+
35
+ SmolagentsInstrumentor().instrument(tracer_provider=trace_provider)
36
+
37
+
38
+ configure_open_telemetry()
39
+
40
+ wiki_storage_tool = RetrieveCSVStorageTool(
41
+ table_name="wiki",
42
+ init_storage=True,
43
+ storage_path="./storage",
44
+ )
45
+
46
+ wiki_agent = CodeAgent(
47
+ name="wiki_agent",
48
+ description= """A wiki agent that can search and retrieve information from Wikipedia.
49
+ It is specialized for handling wikipedia articles, and is recommended over web_agent for retrieving information from wikipedia.""",
50
+ model=LiteLLMModel(model_id="openrouter/qwen/qwen-2.5-coder-32b-instruct"),
51
+ tools=[
52
+ DuckDuckGoSearchTool(),
53
+ wiki_storage_tool,
54
+ WikiTool(storage=wiki_storage_tool.get_storage()),
55
+ ],
56
+ max_steps=10,
57
+ additional_authorized_imports=["pandas"],
58
+ )
59
+
60
+
61
+ web_agent = CodeAgent(
62
+ name="web_agent",
63
+ description="A web agent that can search and visit webpages.",
64
+ model=LiteLLMModel(model_id="openrouter/qwen/qwen-2.5-coder-32b-instruct"),
65
+ tools=[
66
+ DuckDuckGoSearchTool(max_results=10),
67
+ VisitWebpageTool(),
68
+ ],
69
+ verbosity_level=2,
70
+ max_steps=10,
71
+ )
72
+
73
+
74
+ manager_agent = CodeAgent(
75
+ name = "manager_agent",
76
+ model=LiteLLMModel(
77
+ model_id="openrouter/qwen/qwq-32b",
78
+ ),
79
+ tools=[
80
+ fetch_text_content, # fetch text content from a URL
81
+ SpeechRecognitionTool(), # Audio to text
82
+ VisualQATool(), # Visual Question Answering
83
+ read_excel, # Read Excel files
84
+ ],
85
+ managed_agents=[
86
+ wiki_agent,
87
+ web_agent,
88
+ ],
89
+ additional_authorized_imports=["pandas", "requests"],
90
+ planning_interval=5,
91
+ verbosity_level=2,
92
+ max_steps=15,
93
+ )
94
+
95
+
96
+ def parse_file_name(file_base_url: str, file_name: str) -> str:
97
+ if file_name == "":
98
+ return "not provided"
99
+ return file_base_url + Path(file_name).stem
100
+
101
+
102
+ def prepare_for_input(question: dict, file_base_url: str) -> str:
103
+ input_text = dedent(f"""\
104
+ Question:
105
+ {question["question"]}
106
+
107
+ If necessary, use the following file (they may not be provided)
108
+ file_type: {Path(question["file_name"]).suffix}
109
+ file: {parse_file_name(file_base_url, question["file_name"])}
110
+
111
+ Video analysis tools are currently unavailable.
112
+ If the question is about analyzing the video (e.g. questions about Youtube link and mp4), answer 'No Answer'.""")
113
+ return input_text
agent_for_unit4/db.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shelve
2
+ import shutil
3
+ from pathlib import Path
4
+ from typing import Any, Generic, TypeVar
5
+
6
+ T = TypeVar("T")
7
+
8
+
9
+ class ShelveDB(Generic[T]):
10
+ dir_path: Path
11
+
12
+ def __init__(self, db_name: str, init: bool) -> None:
13
+ self.db_path = self.dir_path / db_name
14
+
15
+ if init:
16
+ self.dir_path.mkdir(parents=True, exist_ok=True)
17
+ for file_path in self.dir_path.glob(f"{db_name}*"):
18
+ if file_path.is_file():
19
+ file_path.unlink()
20
+ elif file_path.is_dir():
21
+ shutil.rmtree(file_path)
22
+
23
+ @classmethod
24
+ def from_table(cls, table: str) -> "ShelveDB":
25
+ return cls(table, False)
26
+
27
+ def save(self, key: str, value: Any) -> None:
28
+ with shelve.open(str(self.db_path)) as db:
29
+ db[key] = value
30
+
31
+ def fetch(self, key: str) -> T | None:
32
+ with shelve.open(str(self.db_path)) as db:
33
+ return db.get(key, None)
34
+
35
+ def delete(self, key: str) -> bool:
36
+ with shelve.open(str(self.db_path)) as db:
37
+ if key in db:
38
+ del db[key]
39
+ return True
40
+ return False
41
+
42
+ def clear(self) -> None:
43
+ with shelve.open(str(self.db_path)) as db:
44
+ for key in list(db.keys()):
45
+ del db[key]
46
+
47
+ def list_keys(self) -> list[str]:
48
+ with shelve.open(str(self.db_path)) as db:
49
+ return list(db.keys())
agent_for_unit4/tools.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from io import BytesIO
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ import pandas as pd
7
+ import requests
8
+ from huggingface_hub import InferenceClient
9
+ from smolagents import Tool, tool
10
+
11
+ from .db import ShelveDB
12
+ from .wiki import get_wiki_content
13
+
14
+
15
+ ### convert table to markdown
16
+ @tool
17
+ def convert_pandas_table_to_markdown(table: pd.DataFrame) -> str:
18
+ """
19
+ Converts a pandas DataFrame to a markdown table.
20
+
21
+ Args:
22
+ table (pd.DataFrame): The DataFrame to convert.
23
+
24
+ Returns:
25
+ str: The markdown representation of the table.
26
+ """
27
+ return str(table.to_markdown())
28
+
29
+
30
+ ### fetch text tool
31
+ @tool
32
+ def fetch_text_content(url: str) -> str:
33
+ """
34
+ Fetches the text content from a given URL.
35
+
36
+ Args:
37
+ url (str): The URL to fetch the text from.
38
+
39
+ Returns:
40
+ str: The text content of the page.
41
+ """
42
+ try:
43
+ response = requests.get(url, timeout=30)
44
+ response.raise_for_status() # Raise an error for bad responses
45
+ return response.text
46
+ except requests.RequestException as e:
47
+ return f"Error fetching URL: {e}"
48
+
49
+
50
+ ### Storage Tool
51
+ class RetrieveCSVStorageTool(Tool):
52
+ name = "retrieve_csv_storage_tool"
53
+ description = "Retrieves a CSV file from the storage and returns it as a pandas DataFrame."
54
+ inputs = {
55
+ "key": {
56
+ "type": "string",
57
+ "description": "The key to retrieve data from the table.",
58
+ },
59
+ }
60
+ output_type = "any"
61
+
62
+ def __init__(self, table_name: str, init_storage: bool, storage_path: str | None = None, *args: Any, **kwargs: Any):
63
+ super().__init__(*args, **kwargs)
64
+ if storage_path is not None:
65
+ ShelveDB.dir_path = Path(storage_path)
66
+ self.storage = ShelveDB[pd.DataFrame](table_name, init=init_storage)
67
+
68
+ def get_storage(self) -> ShelveDB[pd.DataFrame]:
69
+ return self.storage
70
+
71
+ def forward(self, key: str) -> pd.DataFrame:
72
+ try:
73
+ # Retrieve the CSV file from storage
74
+ dataframe = self.storage.fetch(key)
75
+ except Exception as e:
76
+ return f"Error retrieving data: {e}"
77
+ else:
78
+ if dataframe is None:
79
+ raise ValueError(f"No data found for key: {key}")
80
+ return dataframe
81
+
82
+
83
+ ### Wikipedia Content Extraction Tool
84
+
85
+
86
+ class WikiTool(Tool):
87
+ name = "wiki_tool"
88
+ description = """Get Wikipedia page content and tables.
89
+ Returns a tuple containing the page content and a dictionary of tables extracted from the page.
90
+ The page content is prefixed with the retrieved table key ({{table_1}}, {{table_2}}, ...).
91
+ To understand what is contained in the tables, it is recommended to first display the content.
92
+ Example 1:
93
+ content, tables = get_wiki_content("Python_(programming_language)")
94
+ print(content)
95
+
96
+ The retrieved table object is are stored in storage.
97
+ They can be retrieved using "retrieve_csv_storage_tool".
98
+ Example 2:
99
+ table:pd.DataFrame = retrieve_csv_storage_tool("table_1")
100
+ """
101
+ inputs = {
102
+ "query": {
103
+ "type": "string",
104
+ "description": "The title of the Wikipedia page to visit. For example, 'Python_(programming_language)'.",
105
+ },
106
+ "language": {
107
+ "type": "string",
108
+ "description": "The language of the Wikipedia page. For example, 'en' for English, 'ja' for Japanese.",
109
+ },
110
+ }
111
+ output_type = "array"
112
+
113
+ def __init__(self, storage: ShelveDB[Any], *args: Any, **kwargs: Any) -> None:
114
+ super().__init__(*args, **kwargs)
115
+ self.storage = storage
116
+
117
+ def forward(self, query: str, language: str) -> tuple[str, dict[str, pd.DataFrame]]:
118
+ content, tables = get_wiki_content(query, language)
119
+ self.storage.clear()
120
+ for table_key, df in tables.items():
121
+ self.storage.save(table_key, df)
122
+ return content, tables
123
+
124
+
125
+ ### Visual Question Answering Tool
126
+
127
+
128
+ def request_visual_qa(client: InferenceClient, question: str, image_url: str) -> str:
129
+ contents = [{"type": "text", "text": question}, {"type": "image_url", "image_url": {"url": image_url}}]
130
+ res = client.chat_completion(messages=[{"role": "user", "content": contents}], model="qwen/qwen2.5-vl-32b-instruct")
131
+ content = res.choices[0].message.content
132
+ if content is None:
133
+ raise ValueError("No content returned from the model.")
134
+ return content
135
+
136
+
137
+ class VisualQATool(Tool):
138
+ name = "visual_qa_tool"
139
+ description = "A tool that can answer questions about image."
140
+ inputs = {
141
+ "image_url": {
142
+ "type": "string",
143
+ "description": "The URL of the image to analyze. No extension needed.",
144
+ },
145
+ "question": {
146
+ "type": "string",
147
+ "description": "The question to ask about the image.",
148
+ },
149
+ }
150
+ output_type = "string"
151
+ client = InferenceClient(
152
+ base_url="https://openrouter.ai/api/v1",
153
+ api_key=os.environ["OPENROUTER_API_KEY"],
154
+ )
155
+
156
+ def forward(self, image_url: str, question: str) -> str:
157
+ try:
158
+ answer = request_visual_qa(self.client, question, image_url)
159
+ except Exception as e:
160
+ return f"Error: {str(e)}"
161
+ else:
162
+ return answer
163
+
164
+
165
+ ### Speech Recognition Tool
166
+
167
+
168
+ def request_speech_recognition(client: InferenceClient, audio_file: str, model: str = "openai/whisper-large-v3") -> str:
169
+ output = client.automatic_speech_recognition(audio_file, model=model)
170
+ return output.text
171
+
172
+
173
+ class SpeechRecognitionTool(Tool):
174
+ name = "speech_recognition"
175
+ description = "Converts audio contents to text"
176
+ inputs = {"audio_url": {"type": "string", "description": "URL of the audio file to transcribe. No extension needed."}}
177
+ output_type = "string"
178
+ client = InferenceClient(provider="fal-ai")
179
+ _model = "openai/whisper-large-v3"
180
+
181
+ def forward(self, audio_url: str) -> str:
182
+ try:
183
+ transcription = request_speech_recognition(self.client, audio_url, model=self._model)
184
+ except Exception as e:
185
+ return f"Error: {str(e)}"
186
+ else:
187
+ return transcription
188
+
189
+
190
+ ### Excel Tool
191
+ @tool
192
+ def read_excel(file_url: str) -> pd.DataFrame:
193
+ """
194
+ Reads an Excel file from a given URL and returns the data as a DataFrame.
195
+
196
+ Args:
197
+ file_url (str): URL of the Excel file to read. No extension needed.
198
+ Returns:
199
+ pd.DataFrame: DataFrame containing the data from the first sheet of the Excel file
200
+ """
201
+ res = requests.get(file_url, timeout=30)
202
+ res.raise_for_status()
203
+ excel_data = BytesIO(res.content)
204
+ df = pd.read_excel(excel_data)
205
+ return df
agent_for_unit4/wiki.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from io import StringIO
3
+ from typing import Any
4
+
5
+ import pandas as pd
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+
9
+
10
+ def process_list_element(list_element: Any, indent: int = 0) -> str:
11
+ """リスト要素を再帰的に処理する関数"""
12
+ result = []
13
+
14
+ is_ordered = list_element.name == "ol"
15
+
16
+ for i, li in enumerate(list_element.find_all("li", recursive=False)):
17
+ # リスト項目のテキストを取得
18
+ # ネストされたリストを除いたテキストを取得
19
+ item_text = ""
20
+ for content in li.contents:
21
+ if content.name not in ["ul", "ol"]:
22
+ item_text += str(content)
23
+
24
+ item_text = BeautifulSoup(item_text, "html.parser").get_text().strip()
25
+
26
+ # 順序付きリストなら番号を、そうでなければ記号を使用
27
+ prefix = " " * indent + (f"{i + 1}. " if is_ordered else "* ")
28
+ if item_text:
29
+ result.append(prefix + item_text)
30
+
31
+ # ネストされたリストを処理
32
+ for nested_list in li.find_all(["ul", "ol"], recursive=False):
33
+ nested_content = process_list_element(nested_list, indent + 1)
34
+ if nested_content:
35
+ result.append(nested_content)
36
+
37
+ return "\n".join(result)
38
+
39
+
40
+ def get_wiki_content(title: str, language: str = "en") -> tuple[str, dict[str, pd.DataFrame]]:
41
+ """
42
+ Get Wikipedia page content and tables.
43
+
44
+ Returns:
45
+ A tuple containing the page content as a string and a dictionary of tables
46
+ extracted from the page. The keys of the dictionary are "table_1", "table_2", etc.
47
+ and the values are pandas DataFrames representing the tables.
48
+
49
+ Example:
50
+ content, tables = get_wiki_content("Python_(programming_language)")
51
+ print(content)
52
+ print(tables["table_1"]) # Access the first table
53
+
54
+ Args:
55
+ title: wikipedia page title (e.g., "Python_(programming_language)")
56
+ language: wikipedia language (e.g., "en" for English, "ja" for Japanese)
57
+ """
58
+ # パースAPIのURLを構築
59
+ api_url = f"https://{language}.wikipedia.org/w/api.php"
60
+
61
+ # APIパラメータ
62
+ params = {
63
+ "action": "parse",
64
+ "page": title,
65
+ "format": "json",
66
+ "prop": "text",
67
+ "disabletoc": True,
68
+ }
69
+
70
+ # リクエストを送信
71
+ response = requests.get(api_url, params=params, timeout=30) # type: ignore
72
+
73
+ # レスポンスをチェック
74
+ if response.status_code != 200:
75
+ raise Exception(f"api error: {response.status_code} - {response.text}")
76
+
77
+ # JSONレスポンスをパース
78
+ data = response.json()
79
+
80
+ # エラーチェック
81
+ if "error" in data:
82
+ raise Exception(f"api error: {data['error']['info']}")
83
+
84
+ if "parse" not in data:
85
+ raise Exception("api error: No parse data found")
86
+
87
+ # HTMLコンテンツを取得
88
+ html_content = data["parse"]["text"]["*"]
89
+
90
+ # HTMLをパース
91
+ soup = BeautifulSoup(html_content, "html.parser")
92
+ content_soup = BeautifulSoup(html_content, "html.parser")
93
+
94
+ # テーブル情報を取得
95
+ tables_dict: dict[str, pd.DataFrame] = {}
96
+ table_ids: list[tuple[str, str]] = [] # (table_id, table_html) のリスト
97
+
98
+ # ターゲットとするテーブルを特定: wikitableとinfobox
99
+ table_index = 1
100
+
101
+ # まず、infobox(バイオグラフィーテーブル)を処理
102
+ infoboxes = soup.find_all("table", class_=lambda c: c and "infobox" in c)
103
+ for i, table in enumerate(infoboxes):
104
+ table_id = f"table_{table_index}"
105
+ table_ids.append((table_id, str(table)))
106
+ table_index += 1
107
+
108
+ # 次に、wikitableを処理
109
+ wikitables = soup.find_all("table", class_="wikitable")
110
+ for i, table in enumerate(wikitables):
111
+ table_id = f"table_{table_index}"
112
+ table_ids.append((table_id, str(table)))
113
+ table_index += 1
114
+
115
+ # 抽出したテーブルをpandasで処理
116
+ for table_id, table_html in table_ids:
117
+ try:
118
+ dfs = pd.read_html(StringIO(table_html))
119
+ if dfs:
120
+ tables_dict[table_id] = dfs[0]
121
+ except Exception:
122
+ # テーブル解析に失敗した場合はスキップ
123
+ continue
124
+
125
+ # コンテンツ内のテーブルをプレースホルダに置き換え
126
+ table_placeholders: dict[str, str] = {}
127
+
128
+ # infoboxの処理
129
+ for i, table in enumerate(content_soup.find_all("table", class_=lambda c: c and "infobox" in c)):
130
+ table_id = f"table_{i + 1}"
131
+ if table_id in tables_dict:
132
+ placeholder = f"{{{{{table_id}}}}}"
133
+ table_placeholders[table_id] = placeholder
134
+ table_placeholder_tag = content_soup.new_tag("p")
135
+ table_placeholder_tag.string = placeholder
136
+ table.replace_with(table_placeholder_tag)
137
+
138
+ # wikitableの処理(インデックスは続きから)
139
+ wikitable_start_index = len(infoboxes) + 1
140
+ for i, table in enumerate(content_soup.find_all("table", class_="wikitable")):
141
+ table_id = f"table_{wikitable_start_index + i}"
142
+ if table_id in tables_dict:
143
+ placeholder = f"{{{{{table_id}}}}}"
144
+ table_placeholders[table_id] = placeholder
145
+ table_placeholder_tag = content_soup.new_tag("p")
146
+ table_placeholder_tag.string = placeholder
147
+ table.replace_with(table_placeholder_tag)
148
+
149
+ # クリーンな本文テキストを抽出
150
+ for element in content_soup.find_all(["sup", "div.hatnote", "div.navbox", "span.mw-editsection"]):
151
+ element.decompose()
152
+
153
+ # 見出し、パラグラフ、リストを取得
154
+ elements = content_soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6", "p", "ul", "ol"])
155
+ text_content = []
156
+
157
+ for element in elements:
158
+ if element.name and element.name.startswith("h"): # type: ignore
159
+ level = int(element.name[1]) # type: ignore
160
+ heading_text = element.get_text().strip()
161
+ if heading_text: # 空の見出しをスキップ
162
+ text_content.append("\n" + "#" * level + " " + heading_text)
163
+ elif element.name == "p": # type: ignore
164
+ paragraph_text = element.get_text().strip()
165
+ if paragraph_text: # 空のパラグラフをスキップ
166
+ # テーブルプレースホルダの場合はそのまま追加
167
+ if re.match(r"^\{\{table_\d+\}\}$", paragraph_text):
168
+ text_content.append(paragraph_text)
169
+ else:
170
+ text_content.append(paragraph_text)
171
+ elif element.name in ["ul", "ol"] and element.parent.name not in ["li", "ul", "ol"]: # type: ignore
172
+ # トップレベルのリストのみ処理(ネストされたものは親liで処理)
173
+ list_content = process_list_element(element)
174
+ if list_content:
175
+ text_content.append(list_content)
176
+
177
+ # テキストコンテンツを結合
178
+ content = "\n\n".join(text_content)
179
+
180
+ return content, tables_dict