ABAO77 commited on
Commit
172064c
·
verified ·
1 Parent(s): 71d6aa3

Upload 60 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +29 -0
  2. app.py +13 -0
  3. requirements.txt +29 -0
  4. src/.DS_Store +0 -0
  5. src/agents/agent_transcript/__pycache__/flow.cpython-311.pyc +0 -0
  6. src/agents/agent_transcript/__pycache__/func.cpython-311.pyc +0 -0
  7. src/agents/agent_transcript/__pycache__/prompt.cpython-311.pyc +0 -0
  8. src/agents/agent_transcript/__pycache__/tools.cpython-311.pyc +0 -0
  9. src/agents/agent_transcript/flow.py +74 -0
  10. src/agents/agent_transcript/func.py +182 -0
  11. src/agents/agent_transcript/prompt.py +182 -0
  12. src/agents/agent_transcript/tools.py +54 -0
  13. src/agents/base/flow.py +23 -0
  14. src/agents/base/func.py +4 -0
  15. src/apis/.DS_Store +0 -0
  16. src/apis/__pycache__/create_app.cpython-311.pyc +0 -0
  17. src/apis/controllers/__pycache__/category_controller.cpython-311.pyc +0 -0
  18. src/apis/controllers/__pycache__/order_controller.cpython-311.pyc +0 -0
  19. src/apis/controllers/__pycache__/service_controller.cpython-311.pyc +0 -0
  20. src/apis/controllers/__pycache__/user_controller.cpython-311.pyc +0 -0
  21. src/apis/controllers/__pycache__/user_service_controller.cpython-311.pyc +0 -0
  22. src/apis/create_app.py +23 -0
  23. src/apis/interfaces/__pycache__/auth_interface.cpython-311.pyc +0 -0
  24. src/apis/interfaces/__pycache__/chat_interface.cpython-311.pyc +0 -0
  25. src/apis/interfaces/__pycache__/file_processing_interface.cpython-311.pyc +0 -0
  26. src/apis/middlewares/__pycache__/auth_middleware.cpython-311.pyc +0 -0
  27. src/apis/middlewares/auth_middleware.py +40 -0
  28. src/apis/models/BaseDocument.py +17 -0
  29. src/apis/models/__pycache__/BaseDocument.cpython-311.pyc +0 -0
  30. src/apis/models/__pycache__/bot_models.cpython-311.pyc +0 -0
  31. src/apis/models/__pycache__/category_models.cpython-311.pyc +0 -0
  32. src/apis/models/__pycache__/grade_models.cpython-311.pyc +0 -0
  33. src/apis/models/__pycache__/order_models.cpython-311.pyc +0 -0
  34. src/apis/models/__pycache__/service_model.cpython-311.pyc +0 -0
  35. src/apis/models/__pycache__/service_provide.cpython-311.pyc +0 -0
  36. src/apis/models/__pycache__/user_models.cpython-311.pyc +0 -0
  37. src/apis/providers/__pycache__/jwt_provider.cpython-311.pyc +0 -0
  38. src/apis/routers/__pycache__/api_testing_router.cpython-311.pyc +0 -0
  39. src/apis/routers/__pycache__/auth_router.cpython-311.pyc +0 -0
  40. src/apis/routers/__pycache__/custom_chatbot_router.cpython-311.pyc +0 -0
  41. src/apis/routers/__pycache__/file_processing_router.cpython-311.pyc +0 -0
  42. src/apis/routers/__pycache__/gen_script.cpython-311.pyc +0 -0
  43. src/apis/routers/__pycache__/grade_code_router.cpython-311.pyc +0 -0
  44. src/apis/routers/__pycache__/graded_assignment_router.cpython-311.pyc +0 -0
  45. src/apis/routers/__pycache__/image_generation.cpython-311.pyc +0 -0
  46. src/apis/routers/__pycache__/rag_agent_template.cpython-311.pyc +0 -0
  47. src/apis/routers/__pycache__/vector_store_router.cpython-311.pyc +0 -0
  48. src/apis/routers/gen_script.py +158 -0
  49. src/config/__pycache__/cloudinary.cpython-311.pyc +0 -0
  50. src/config/__pycache__/constants.cpython-311.pyc +0 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ RUN apt-get update && \
4
+ apt-get install -y --no-install-recommends \
5
+ gcc libglib2.0-0 libsm6 libxext6 libxrender-dev \
6
+ git \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ RUN useradd -m -u 1000 user
10
+ USER user
11
+ ENV PATH="/home/user/.local/bin:$PATH"
12
+ WORKDIR /app
13
+
14
+ # 1. Copy requirements
15
+ COPY --chown=user ./requirements.txt requirements.txt
16
+
17
+ # 2. Cài pip + dependencies chính
18
+ RUN pip install --no-cache-dir --upgrade pip \
19
+ && pip install --no-cache-dir --upgrade -r requirements.txt
20
+
21
+ # # 3. Cài torch bản CPU (bắt buộc, cài trước để các package khác nhận ra)
22
+ # RUN pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
23
+
24
+ # # 4. Cài langchain_docling (không kèm torch nữa)
25
+ # RUN pip install --no-cache-dir langchain_docling
26
+
27
+ # 5. Copy code vào image
28
+ COPY --chown=user . /app
29
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+
3
+ load_dotenv(override=True)
4
+
5
+ from src.apis.create_app import create_app, api_router
6
+ import uvicorn
7
+
8
+
9
+ app = create_app()
10
+
11
+ app.include_router(api_router)
12
+ if __name__ == "__main__":
13
+ uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
requirements.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langgraph
2
+ langchain
3
+ langchain-community
4
+ python-dotenv
5
+ fastapi
6
+ motor
7
+ langchain-google-genai
8
+ langchain-openai
9
+ langchain-pinecone
10
+ PyMuPDF
11
+ pytz
12
+ uvicorn[standard]
13
+ python-multipart
14
+ langchain_experimental
15
+ duckduckgo-search
16
+ pydantic[email]
17
+ python_jose==3.3.0
18
+ pillow
19
+ python-docx
20
+ langchain-text-splitters
21
+ PyJWT==2.8.0
22
+ pymupdf
23
+ docx2txt
24
+ gitpython
25
+ tiktoken
26
+ google-genai
27
+ langchain-core
28
+ youtube-transcript-api
29
+ youtube-comment-downloader
src/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/agents/agent_transcript/__pycache__/flow.cpython-311.pyc ADDED
Binary file (3.98 kB). View file
 
src/agents/agent_transcript/__pycache__/func.cpython-311.pyc ADDED
Binary file (8.91 kB). View file
 
src/agents/agent_transcript/__pycache__/prompt.cpython-311.pyc ADDED
Binary file (6.42 kB). View file
 
src/agents/agent_transcript/__pycache__/tools.cpython-311.pyc ADDED
Binary file (3.09 kB). View file
 
src/agents/agent_transcript/flow.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import StateGraph, START, END
2
+ from .func import (
3
+ State,
4
+ trim_history,
5
+ extract_transcript_and_comment,
6
+ script_structure_analyzer,
7
+ comment_insight_extractor,
8
+ scientific_fact_finder,
9
+ script_re_outline,
10
+ script_writer_init,
11
+ script_writer_single,
12
+ should_continue_writing,
13
+ script_writer_end,
14
+ )
15
+ from langgraph.graph.state import CompiledStateGraph
16
+
17
+
18
+ class AgentTranscript:
19
+ def __init__(self):
20
+ self.builder = StateGraph(State)
21
+
22
+ def node(self):
23
+ self.builder.add_node("trim_history", trim_history)
24
+ self.builder.add_node(
25
+ "extract_transcript_and_comment", extract_transcript_and_comment
26
+ )
27
+ self.builder.add_node("script_structure_analyzer", script_structure_analyzer)
28
+ self.builder.add_node("comment_insight_extractor", comment_insight_extractor)
29
+ self.builder.add_node("scientific_fact_finder", scientific_fact_finder)
30
+ self.builder.add_node("script_re_outline", script_re_outline)
31
+ self.builder.add_node("script_writer_init", script_writer_init)
32
+ self.builder.add_node("script_writer_single", script_writer_single)
33
+ self.builder.add_node("script_writer_end", script_writer_end)
34
+
35
+ def edge(self):
36
+ self.builder.add_edge(START, "trim_history")
37
+ self.builder.add_edge("trim_history", "extract_transcript_and_comment")
38
+ self.builder.add_edge(
39
+ "extract_transcript_and_comment", "script_structure_analyzer"
40
+ )
41
+ self.builder.add_edge("script_structure_analyzer", "comment_insight_extractor")
42
+ self.builder.add_edge("comment_insight_extractor", "scientific_fact_finder")
43
+ self.builder.add_edge("scientific_fact_finder", "script_re_outline")
44
+ self.builder.add_edge("script_re_outline", "script_writer_init")
45
+
46
+ # Conditional routing for script writing
47
+ self.builder.add_conditional_edges(
48
+ "script_writer_init",
49
+ should_continue_writing,
50
+ {
51
+ "script_writer_single": "script_writer_single",
52
+ "script_writer_end": "script_writer_end"
53
+ }
54
+ )
55
+
56
+ self.builder.add_conditional_edges(
57
+ "script_writer_single",
58
+ should_continue_writing,
59
+ {
60
+ "script_writer_single": "script_writer_single",
61
+ "script_writer_end": "script_writer_end"
62
+ }
63
+ )
64
+
65
+ self.builder.add_edge("script_writer_end", END)
66
+
67
+ def __call__(self) -> CompiledStateGraph:
68
+ self.node()
69
+ self.edge()
70
+
71
+ return self.builder.compile()
72
+
73
+
74
+ script_writer_agent = AgentTranscript()()
src/agents/agent_transcript/func.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import TypedDict, Optional, List
2
+ from langchain_core.messages import AnyMessage, ToolMessage, HumanMessage, AIMessage
3
+ from langgraph.graph.message import add_messages
4
+ from typing import Sequence, Annotated
5
+ from langchain_core.messages import RemoveMessage
6
+ from langchain_core.documents import Document
7
+ from src.config.llm import get_llm
8
+ from src.utils.logger import logger
9
+ from src.utils.helper import extract_transcript, extract_comment
10
+ from .prompt import *
11
+ import operator
12
+
13
+
14
+ class State(TypedDict):
15
+ video_link: str
16
+ messages: Annotated[Sequence[AnyMessage], add_messages]
17
+ transcript: str
18
+ comment: str
19
+ script_structure_analyzer_response: str
20
+ comment_insight_extractor_response: str
21
+ research_insight_response: str
22
+ script_re_outline_response: str
23
+ script_writer_response: List[str]
24
+ target_word_count: int
25
+ script_count: int
26
+ current_script_index: int
27
+
28
+
29
+ def trim_history(state: State):
30
+ history = state.get("messages", [])
31
+
32
+ if len(history) > 20:
33
+ num_to_remove = len(history) - 20
34
+ remove_messages = [
35
+ RemoveMessage(id=history[i].id) for i in range(num_to_remove)
36
+ ]
37
+ return {
38
+ "messages": remove_messages,
39
+ "selected_ids": [],
40
+ "selected_documents": [],
41
+ }
42
+
43
+ return {}
44
+
45
+
46
+ def extract_transcript_and_comment(state: State):
47
+ transcript = extract_transcript(state["video_link"])
48
+ comment = extract_comment(state["video_link"])
49
+
50
+ # Calculate script count based on target word count
51
+ # Assume each script is around 200-300 words
52
+ avg_words_per_script = 1000
53
+ script_count = max(1, state.get("target_word_count", 8000) // avg_words_per_script)
54
+
55
+ return {
56
+ "transcript": transcript,
57
+ "comment": comment,
58
+ "script_count": script_count,
59
+ "messages": HumanMessage(
60
+ content=f"Will generate {script_count} scripts for {state.get('target_word_count', 8000)} words target"
61
+ ),
62
+ }
63
+
64
+
65
+ def script_structure_analyzer(state: State):
66
+ transcript = state["transcript"]
67
+ response = chain_script_structure_analyzer.invoke({"script": transcript})
68
+ return {
69
+ "script_structure_analyzer_response": response.content,
70
+ "messages": HumanMessage(
71
+ content="Script Structure Analyzer Response: " + response.content
72
+ ),
73
+ }
74
+
75
+
76
+ def comment_insight_extractor(state: State):
77
+ response = chain_comment_insight_extractor.invoke(
78
+ {
79
+ "comment": state["comment"],
80
+ "script_structure_analyzer_response": state[
81
+ "script_structure_analyzer_response"
82
+ ],
83
+ }
84
+ )
85
+ return {
86
+ "comment_insight_extractor_response": response.content,
87
+ "messages": HumanMessage(
88
+ content="Comment Insight Extractor Response: " + response.content
89
+ ),
90
+ }
91
+
92
+
93
+ def scientific_fact_finder(state: State):
94
+ input_message = {}
95
+ input_message["messages"] = [
96
+ {
97
+ "role": "user",
98
+ "content": f"""Hãy tìm 3-5 nghiên cứu khoa học thực tế (PubMed, JAMA, Circulation, Nutrients…),
99
+ Tóm tắt số liệu, trích nguồn, gợi ý số liệu phù hợp cho từng đoạn trong script mới. Dựa trên các thông tin sau:
100
+ Script Structure Analyzer Response: {state["script_structure_analyzer_response"]}
101
+ Comment Insight Extractor Response: {state["comment_insight_extractor_response"]}
102
+ """,
103
+ }
104
+ ]
105
+ response = scientific_fact_agent.invoke(input_message)
106
+ research_insight = response["messages"][-1].content
107
+ return {
108
+ "research_insight_response": research_insight,
109
+ "messages": HumanMessage(
110
+ content="Scientific Fact Finder Response: " + research_insight
111
+ ),
112
+ }
113
+
114
+
115
+ def script_re_outline(state: State):
116
+ response = chain_script_re_outline.invoke({"messages": state["messages"]})
117
+ return {
118
+ "script_re_outline_response": response.content,
119
+ "messages": HumanMessage(
120
+ content="Script Re-Outline Response: " + response.content
121
+ ),
122
+ }
123
+
124
+
125
+ def script_writer_init(state: State):
126
+ """Initialize script writing process"""
127
+ return {
128
+ "script_writer_response": [],
129
+ "current_script_index": 0,
130
+ "messages": HumanMessage(content="Starting script generation process..."),
131
+ }
132
+
133
+
134
+ def script_writer_single(state: State):
135
+ """Generate a single script"""
136
+ current_index = state.get("current_script_index", 0)
137
+ script_count = state.get("script_count", 10)
138
+ target_word_count = state.get("target_word_count", 8000)
139
+ words_per_script = target_word_count // script_count if script_count > 0 else 1000
140
+
141
+ # Get existing scripts
142
+ script_out = list(state.get("script_writer_response", []))
143
+ current_messages = list(state["messages"])
144
+
145
+ # Add word count guidance to the prompt
146
+ if current_index == 0:
147
+ word_prompt = f"Hãy viết script đầu tiên với khoảng {words_per_script} từ."
148
+ else:
149
+ word_prompt = f"ok, viết cho tôi phần tiếp theo, bám sát cấu trúc, khoảng {words_per_script} từ cho script này, các công thức tạo cảm xúc và đừng quên đối tượng khán giả là người Mỹ,giới tính nữ, trên 20 tuổi, bắt đầu, trình bày thành dạng câu văn liền mạch, dùng để làm văn nói cho video YouTube, không dùng icon"
150
+
151
+ current_messages.append(HumanMessage(content=word_prompt))
152
+
153
+ # Generate script
154
+ response = chain_script_writer.invoke({"messages": current_messages})
155
+ script_out.append(response.content)
156
+
157
+ # Add response to message history
158
+ current_messages.append(AIMessage(content=response.content))
159
+
160
+ return {
161
+ "script_writer_response": script_out,
162
+ "current_script_index": current_index + 1,
163
+ "messages": current_messages
164
+ + [
165
+ HumanMessage(content=f"Script {current_index + 1}/{script_count} completed")
166
+ ],
167
+ }
168
+
169
+
170
+ def should_continue_writing(state: State):
171
+ """Check if we should continue writing more scripts"""
172
+ current_index = state.get("current_script_index", 0)
173
+ script_count = state.get("script_count", 10)
174
+ return (
175
+ "script_writer_single" if current_index < script_count else "script_writer_end"
176
+ )
177
+
178
+
179
+ def script_writer_end(state: State):
180
+ """Finalize script writing"""
181
+ script_count = len(state.get("script_writer_response", []))
182
+ return {"messages": HumanMessage(content=f"All {script_count} scripts completed!")}
src/agents/agent_transcript/prompt.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import ChatPromptTemplate
2
+ from src.config.llm import llm_2_0 as llm, llm_2_5_flash_preview
3
+ from pydantic import BaseModel, Field
4
+ from langchain_community.tools import DuckDuckGoSearchResults
5
+ from langgraph.prebuilt import create_react_agent
6
+
7
+ duckduckgo_search = DuckDuckGoSearchResults(max_results=10, output_format="json")
8
+
9
+ script_structure_analyzer_prompt = ChatPromptTemplate.from_messages(
10
+ [
11
+ (
12
+ "system",
13
+ """
14
+ Vai trò: Bạn là Script Structure Analyzer trong một workflow của một nhóm các agent.
15
+ Instruction:
16
+ - Tự động phân tích kịch bản gốc, tách các phần (Mở bài, Thân bài, Điểm chốt, CTA)
17
+ - Xác định công thức cấu trúc (AIDA, PAS, BFB,...)
18
+ - Trích xuất hook, câu chuyển đoạn, CTA
19
+ - Phát hiện điểm mạnh/yếu/chỗ lạc nhịp
20
+
21
+ Input: Script gốc
22
+ Output:
23
+ - Outline:
24
+ - Mở bài
25
+ - Thân bài
26
+ - Điểm chốt
27
+ - CTA
28
+ - Công thức cấu trúc
29
+ - AIDA: Attention, Interest, Desire, Action
30
+ - PAS: Problem, Agitation, Solution
31
+ - BFB: Belief, Feeling, Behavior
32
+ - Hook
33
+ - Câu chuyển đoạn
34
+ - CTA
35
+ - Điểm mạnh/yếu/chỗ lạc nhịp
36
+ """,
37
+ ),
38
+ ("user", "input script: {script}"),
39
+ ]
40
+ )
41
+
42
+ comment_insight_extractor_prompt = ChatPromptTemplate.from_messages(
43
+ [
44
+ (
45
+ "system",
46
+ """
47
+ Vai trò: Bạn là Comment Insight Extractor trong một workflow của một nhóm các agent phân tích youtube video.
48
+ Instruction:
49
+ - Đọc, phân tích tất cả comment, trích xuất insight
50
+ - lọc ra các câu hỏi lặp lại, nỗi sợ/mong muốn/lợi ích/ngôn ngữ quen thuộc
51
+ - So sánh insight với script gốc và xác định thiếu sót.
52
+
53
+ Input:
54
+ - Output từ Script Structure Analyzer Agent Youtube Video
55
+ - Comment
56
+
57
+ Output:
58
+ - Insights Table:
59
+ - Insight
60
+ - Original Comment
61
+ - Pain or Benefit
62
+ - Suggest for Script
63
+ - Missing From Script
64
+ - Repeated Questions
65
+ - Audience Language
66
+
67
+ """,
68
+ ),
69
+ ("user", "input comment: {comment}"),
70
+ (
71
+ "user",
72
+ "input script_structure_analyzer_response: {script_structure_analyzer_response}",
73
+ ),
74
+ ]
75
+ )
76
+ scientific_fact_finder_prompt = ChatPromptTemplate.from_messages(
77
+ [
78
+ (
79
+ "system",
80
+ """
81
+ Vai trò: Bạn là Scientific Fact Finder trong một workflow của một nhóm các agent phân tích youtube video.
82
+ Instruction:
83
+ - Tự động research 3-5 nghiên cứu khoa học thực tế (PubMed, JAMA, Circulation, Nutrients…), tóm tắt số liệu, trích nguồn, gợi ý số liệu phù hợp cho từng đoạn trong script mới.
84
+ - So sánh fact science với script gốc và xác định thiếu sót.
85
+
86
+ Input:
87
+ - Output từ Script Structure Analyzer Agent Youtube Video
88
+ - Output từ Comment Insight Extractor Agent Youtube Video
89
+
90
+ Output List:
91
+ - Title: Tên nghiên cứu
92
+ - Summary: Tóm tắt nghiên cứu
93
+ - Source: Nguồn nghiên cứu
94
+ - Relevant for Section: Relevant cho section nào trong script mới
95
+ """,
96
+ ),
97
+ ("placeholder", "{messages}"),
98
+ ]
99
+ )
100
+
101
+ script_re_outline_prompt = ChatPromptTemplate.from_messages(
102
+ [
103
+ (
104
+ "system",
105
+ """
106
+ Vai trò: Bạn là Script Re-Outline Agent trong một workflow của một nhóm các agent.
107
+ Instruction:
108
+ Kết hợp outline cũ, insight từ comment, fact từ research để lập outline mới: Hook mới, thứ tự section mới, CTA mới, các ý chuyển mạch rõ ràng, phân bổ fact/nghiên cứu vào các section.
109
+
110
+ Input:
111
+ - Output từ Script Structure Analyzer Agent
112
+ - Output từ Comment Insight Extractor Agent
113
+ - Output từ Scientific Fact Finder Agent
114
+
115
+ Output:
116
+
117
+ - Outline mới: (Section, summary, suggested length, facts to include)
118
+ - Hook mở bài
119
+ - Thân bài 1
120
+ - Thân bài 2
121
+ - Điểm chốt
122
+ - CTA
123
+ - CTA position
124
+ - Transitions
125
+ - Order Logic
126
+ """,
127
+ ),
128
+ ("placeholder", "{messages}"),
129
+ ]
130
+ )
131
+
132
+ script_writer_prompt = ChatPromptTemplate.from_messages(
133
+ [
134
+ (
135
+ "system",
136
+ """
137
+ Vai trò: Bạn là Script Writer dựa trên các nội dung, insight được cung cấp.
138
+ Instruction:
139
+ - Viết lại từng phần dựa theo outline mới, dữ liệu nghiên cứu, insight comment, giữ văn liền mạch - cảm xúc - kể chuyện, format cho video YouTube (dạng văn nói, không dùng icon, chỉ text).
140
+ - Viết theo hội thoại chỉ có một người nói, không có người khác.
141
+
142
+ Input:
143
+ - Output từ Script Re-Outline Agent (Important)
144
+ - Output từ Scientific Fact Finder Agent
145
+ - Output từ Comment Insight Extractor Agent
146
+
147
+ Processing:
148
+ - Sau khi viết 1 phần, ngừng ngay.
149
+ - Output phải liền mạch, không có gạch đầu dòng.
150
+ - Tone giọng thân thiện, kể truyện, truyền cảm xúc, không dùng icon, chỉ dùng text.
151
+ - Cài hook cảm xúc, ví dụ thực tế
152
+ - Kể mở ra CTA hoặc dẫn sang phần tiếp theo.
153
+ - Có câu hỏi tu từ nhẹ nhàng
154
+ - Nhắc lại lợi ích quan trọng
155
+ - So sánh "thay vì... thì..." để khán giả thấy rõ "why"
156
+ - Không dùng icon, emoji
157
+ - Kết thúc phải là kết thúc mở đề người dùng có thể yêu cầu viết tiếp thay vì kết thúc sau khi hoàn thành đủ hook, thân bài, điểm chốt, CTA.
158
+ Output:
159
+ - Title: Tên của phần nội dung
160
+ - Content: Script content
161
+
162
+ Lưu ý: Chỉ gen ra một phần nội dung.
163
+ - Nếu user nhập 'ok, viết cho tôi phần tiếp theo, bám sát cấu trúc, số lượng từ cho mỗi mục trong outline, các công thức tạo cảm xúc và đừng quên đối tượng khán giả là người Mỹ,giới tính nữ, trên 20 tuổi, bắt đầu, trình bày thành dạng câu văn liền mạch, dùng để làm văn nói cho video YouTube, không dùng icon' thì tiếp tục viết tiếp.
164
+
165
+ """,
166
+ ),
167
+ ("placeholder", "{messages}"),
168
+ ]
169
+ )
170
+
171
+
172
+ chain_script_structure_analyzer = script_structure_analyzer_prompt | llm
173
+ chain_comment_insight_extractor = comment_insight_extractor_prompt | llm
174
+ scientific_fact_agent = create_react_agent(
175
+ model=llm,
176
+ tools=[duckduckgo_search],
177
+ prompt=scientific_fact_finder_prompt,
178
+ )
179
+
180
+
181
+ chain_script_re_outline = script_re_outline_prompt | llm
182
+ chain_script_writer = script_writer_prompt | llm_2_5_flash_preview
src/agents/agent_transcript/tools.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from langchain_core.tools import tool
2
+ # from src.utils.helper import convert_list_context_source_to_str
3
+ # from src.utils.logger import logger
4
+ # from langchain_core.runnables import RunnableConfig
5
+ # from langchain_experimental.utilities import PythonREPL
6
+ # from langchain_community.tools import DuckDuckGoSearchRun
7
+
8
+
9
+ # duckduckgo_search = DuckDuckGoSearchRun(max_results=10, output_format="json")
10
+
11
+ # python_exec = PythonREPL()
12
+
13
+
14
+ # @tool
15
+ # def retrieve_document(query: str, config: RunnableConfig):
16
+ # """Ưu tiên truy xuất tài liệu từ vector store nếu câu hỏi liên quan đến vai trò của chatbot.
17
+
18
+ # Args:
19
+ # query (str): Câu truy vấn của người dùng bằng tiếng Việt
20
+ # Returns:
21
+ # str: Retrieved documents
22
+ # """
23
+ # configuration = config.get("configurable", {})
24
+ # bot_id = configuration.get("bot_id", None)
25
+ # if not bot_id:
26
+ # logger.error("Bot ID is not found")
27
+ # return {"context_str": "", "selected_documents": [], "selected_ids": []}
28
+ # retriever = test_rag_vector_store.as_retriever(
29
+ # search_type="similarity_score_threshold",
30
+ # search_kwargs={"k": 5, "score_threshold": 0.3},
31
+ # )
32
+ # documents = retriever.invoke(query, filter={"bot_id": bot_id})
33
+ # selected_documents = [doc.__dict__ for doc in documents]
34
+ # selected_ids = [doc["id"] for doc in selected_documents]
35
+ # context_str = convert_list_context_source_to_str(documents)
36
+
37
+ # return {
38
+ # "context_str": context_str,
39
+ # "selected_documents": selected_documents,
40
+ # "selected_ids": selected_ids,
41
+ # }
42
+
43
+
44
+ # @tool
45
+ # def python_repl(code: str):
46
+ # """
47
+ # A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.
48
+
49
+ # Args:
50
+ # code (str): Python code to execute
51
+ # Returns:
52
+ # str: Output of the Python code
53
+ # """
54
+ # return python_exec.run(code)
src/agents/base/flow.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import StateGraph, START, END
2
+ from src.config.llm import llm_2_0
3
+ from .func import State
4
+ from langgraph.graph.state import CompiledStateGraph
5
+
6
+
7
+ class PrimaryChatBot:
8
+ def __init__(self):
9
+ self.builder = StateGraph(State)
10
+
11
+ @staticmethod
12
+ def routing(state: State):
13
+ pass
14
+
15
+ def node(self):
16
+ pass
17
+
18
+ def edge(self):
19
+ pass
20
+ def __call__(self) -> CompiledStateGraph:
21
+ self.node()
22
+ self.edge()
23
+ return self.builder.compile()
src/agents/base/func.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from typing import TypedDict
2
+
3
+ class State(TypedDict):
4
+ pass
src/apis/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/apis/__pycache__/create_app.cpython-311.pyc ADDED
Binary file (1.01 kB). View file
 
src/apis/controllers/__pycache__/category_controller.cpython-311.pyc ADDED
Binary file (11.5 kB). View file
 
src/apis/controllers/__pycache__/order_controller.cpython-311.pyc ADDED
Binary file (18.5 kB). View file
 
src/apis/controllers/__pycache__/service_controller.cpython-311.pyc ADDED
Binary file (9.87 kB). View file
 
src/apis/controllers/__pycache__/user_controller.cpython-311.pyc ADDED
Binary file (8.12 kB). View file
 
src/apis/controllers/__pycache__/user_service_controller.cpython-311.pyc ADDED
Binary file (8.53 kB). View file
 
src/apis/create_app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, APIRouter
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from src.apis.routers.gen_script import router as gen_script_router
4
+
5
+
6
+ api_router = APIRouter()
7
+ api_router.include_router(gen_script_router)
8
+
9
+
10
+ def create_app():
11
+ app = FastAPI(
12
+ docs_url="/",
13
+ title="AI Service ABAOXOMTIEU",
14
+ )
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ return app
src/apis/interfaces/__pycache__/auth_interface.cpython-311.pyc ADDED
Binary file (1.76 kB). View file
 
src/apis/interfaces/__pycache__/chat_interface.cpython-311.pyc ADDED
Binary file (1.92 kB). View file
 
src/apis/interfaces/__pycache__/file_processing_interface.cpython-311.pyc ADDED
Binary file (2.38 kB). View file
 
src/apis/middlewares/__pycache__/auth_middleware.cpython-311.pyc ADDED
Binary file (2.21 kB). View file
 
src/apis/middlewares/auth_middleware.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Annotated
2
+ from fastapi import Depends
3
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
4
+ from fastapi.responses import JSONResponse
5
+ from src.apis.providers.jwt_provider import jwt_provider as jwt
6
+ from src.apis.models.user_models import get_user
7
+ from src.config.mongo import UserCRUD
8
+ from bson import ObjectId
9
+ from jose import JWTError
10
+ from src.utils.logger import logger
11
+
12
+ security = HTTPBearer()
13
+
14
+
15
+ async def get_current_user(
16
+ credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)]
17
+ ):
18
+
19
+ try:
20
+ token = credentials.credentials
21
+ if not token:
22
+ return JSONResponse(
23
+ content={"msg": "Authentication failed"}, status_code=401
24
+ )
25
+ payload = jwt.decrypt(token)
26
+ user_id: str = payload["id"]
27
+ if not user_id:
28
+ return JSONResponse(
29
+ content={"msg": "Authentication failed"}, status_code=401
30
+ )
31
+ user = await UserCRUD.read_one({"_id": ObjectId(user_id)})
32
+ user_email = user.get("email", None)
33
+ logger.info(f"Request of user: {user_email}")
34
+ if not user:
35
+ return JSONResponse(
36
+ content={"msg": "Authentication failed"}, status_code=401
37
+ )
38
+ return get_user(user)
39
+ except JWTError:
40
+ return JSONResponse(content={"msg": "Authentication failed"}, status_code=401)
src/apis/models/BaseDocument.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional
3
+ from datetime import datetime
4
+ from src.utils.logger import get_date_time
5
+
6
+
7
+ class BaseDocument(BaseModel):
8
+ created_at: Optional[datetime] = Field(
9
+ default_factory=lambda: get_date_time().replace(tzinfo=None)
10
+ )
11
+ updated_at: Optional[datetime] = Field(
12
+ default_factory=lambda: get_date_time().replace(tzinfo=None)
13
+ )
14
+ expire_at: Optional[datetime] = None
15
+
16
+ class Config:
17
+ arbitrary_types_allowed = True
src/apis/models/__pycache__/BaseDocument.cpython-311.pyc ADDED
Binary file (1.66 kB). View file
 
src/apis/models/__pycache__/bot_models.cpython-311.pyc ADDED
Binary file (1.26 kB). View file
 
src/apis/models/__pycache__/category_models.cpython-311.pyc ADDED
Binary file (3.98 kB). View file
 
src/apis/models/__pycache__/grade_models.cpython-311.pyc ADDED
Binary file (1.83 kB). View file
 
src/apis/models/__pycache__/order_models.cpython-311.pyc ADDED
Binary file (7.71 kB). View file
 
src/apis/models/__pycache__/service_model.cpython-311.pyc ADDED
Binary file (4.76 kB). View file
 
src/apis/models/__pycache__/service_provide.cpython-311.pyc ADDED
Binary file (4.77 kB). View file
 
src/apis/models/__pycache__/user_models.cpython-311.pyc ADDED
Binary file (2.62 kB). View file
 
src/apis/providers/__pycache__/jwt_provider.cpython-311.pyc ADDED
Binary file (2.25 kB). View file
 
src/apis/routers/__pycache__/api_testing_router.cpython-311.pyc ADDED
Binary file (4.96 kB). View file
 
src/apis/routers/__pycache__/auth_router.cpython-311.pyc ADDED
Binary file (4.73 kB). View file
 
src/apis/routers/__pycache__/custom_chatbot_router.cpython-311.pyc ADDED
Binary file (8.41 kB). View file
 
src/apis/routers/__pycache__/file_processing_router.cpython-311.pyc ADDED
Binary file (10.5 kB). View file
 
src/apis/routers/__pycache__/gen_script.cpython-311.pyc ADDED
Binary file (7.34 kB). View file
 
src/apis/routers/__pycache__/grade_code_router.cpython-311.pyc ADDED
Binary file (7.51 kB). View file
 
src/apis/routers/__pycache__/graded_assignment_router.cpython-311.pyc ADDED
Binary file (5.72 kB). View file
 
src/apis/routers/__pycache__/image_generation.cpython-311.pyc ADDED
Binary file (4 kB). View file
 
src/apis/routers/__pycache__/rag_agent_template.cpython-311.pyc ADDED
Binary file (21.1 kB). View file
 
src/apis/routers/__pycache__/vector_store_router.cpython-311.pyc ADDED
Binary file (5.35 kB). View file
 
src/apis/routers/gen_script.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from fastapi.responses import StreamingResponse
3
+ from langchain_core.messages import AIMessageChunk
4
+ from langchain_core.runnables import RunnableConfig
5
+ from src.agents.agent_transcript.flow import script_writer_agent
6
+ from src.utils.logger import logger
7
+ from pydantic import BaseModel
8
+ import json
9
+ import asyncio
10
+
11
+
12
+ class GenScriptRequest(BaseModel):
13
+ video_link: str
14
+ target_word_count: int = 50000 # Default 2500 words
15
+
16
+
17
+ router = APIRouter()
18
+
19
+
20
+ async def message_generator(
21
+ input_graph: dict,
22
+ config: RunnableConfig,
23
+ ):
24
+ try:
25
+ last_output_state = None
26
+
27
+ try:
28
+ async for event in script_writer_agent.astream(
29
+ input=input_graph, stream_mode=["messages", "values"], config=config
30
+ ):
31
+ try:
32
+ event_type, event_message = event
33
+ logger.info(f"Event type: {event_type}")
34
+
35
+ if event_type == "messages":
36
+ message, metadata = event_message
37
+ if isinstance(message, AIMessageChunk):
38
+ # Stream AI message chunks
39
+ node = metadata.get("node")
40
+ chunk_data = {
41
+ "type": "message_chunk",
42
+ "content": message.content,
43
+ "metadata": metadata,
44
+ "node_step": node,
45
+ }
46
+ logger.info(f"Chunk data: {chunk_data}")
47
+ yield f"data: {json.dumps(chunk_data)}\n\n"
48
+
49
+ elif event_type == "values":
50
+ # Stream state updates
51
+ state_data = {"type": "state_update", "state": event_message}
52
+ last_output_state = event_message
53
+
54
+ # Handle specific data extractions
55
+ if "transcript" in event_message and event_message["transcript"]:
56
+ transcript_data = {
57
+ "type": "transcript_extracted",
58
+ "transcript": event_message["transcript"][:500] + "..." if len(event_message["transcript"]) > 500 else event_message["transcript"],
59
+ "full_length": len(event_message["transcript"])
60
+ }
61
+ yield f"data: {json.dumps(transcript_data)}\n\n"
62
+
63
+ if "comment" in event_message and event_message["comment"]:
64
+ comment_data = {
65
+ "type": "comment_extracted",
66
+ "comment": event_message["comment"][:500] + "..." if len(event_message["comment"]) > 500 else event_message["comment"],
67
+ "full_length": len(event_message["comment"])
68
+ }
69
+ yield f"data: {json.dumps(comment_data)}\n\n"
70
+
71
+ if "script_count" in event_message:
72
+ script_count_data = {
73
+ "type": "script_count_calculated",
74
+ "script_count": event_message["script_count"],
75
+ "target_word_count": event_message.get("target_word_count", 8000)
76
+ }
77
+ yield f"data: {json.dumps(script_count_data)}\n\n"
78
+
79
+ # Handle individual script updates
80
+ if "script_writer_response" in event_message and "current_script_index" in event_message:
81
+ current_scripts = event_message["script_writer_response"]
82
+ current_index = event_message["current_script_index"]
83
+ script_count = event_message.get("script_count", 10)
84
+
85
+ if current_scripts:
86
+ individual_script_data = {
87
+ "type": "individual_script",
88
+ "script_index": current_index,
89
+ "script_content": current_scripts[-1] if current_scripts else "",
90
+ "progress": f"{current_index}/{script_count}",
91
+ "scripts": current_scripts
92
+ }
93
+ yield f"data: {json.dumps(individual_script_data)}\n\n"
94
+
95
+ yield f"data: {json.dumps(state_data, default=str)}\n\n"
96
+
97
+ except Exception as e:
98
+ logger.error(f"Error processing event: {e}")
99
+ error_data = {"type": "error", "message": str(e)}
100
+ yield f"data: {json.dumps(error_data)}\n\n"
101
+
102
+ except Exception as e:
103
+ logger.error(f"Error in streaming: {e}")
104
+ error_data = {"type": "error", "message": str(e)}
105
+ yield f"data: {json.dumps(error_data)}\n\n"
106
+
107
+ # Send final result
108
+ if last_output_state:
109
+ final_data = {
110
+ "type": "final_result",
111
+ "scripts": last_output_state.get("script_writer_response", []),
112
+ "total_scripts": len(
113
+ last_output_state.get("script_writer_response", [])
114
+ ),
115
+ }
116
+ yield f"data: {json.dumps(final_data, default=str)}\n\n"
117
+
118
+ except Exception as e:
119
+ logger.error(f"Fatal error in message_generator: {e}")
120
+ yield f"data: {json.dumps({'type': 'fatal_error', 'message': str(e)})}\n\n"
121
+
122
+
123
+ @router.post("/gen-script")
124
+ async def gen_script(request: GenScriptRequest):
125
+ """
126
+ Generate scripts with streaming response
127
+ """
128
+ config = RunnableConfig()
129
+ input_graph = {
130
+ "video_link": request.video_link,
131
+ "target_word_count": request.target_word_count
132
+ }
133
+
134
+ return StreamingResponse(
135
+ message_generator(input_graph, config),
136
+ media_type="text/plain",
137
+ headers={
138
+ "Cache-Control": "no-cache",
139
+ "Connection": "keep-alive",
140
+ "Content-Type": "text/event-stream",
141
+ },
142
+ )
143
+
144
+
145
+ @router.post("/gen-script-sync")
146
+ def gen_script_sync(request: GenScriptRequest):
147
+ """
148
+ Generate scripts with synchronous response (non-streaming)
149
+ """
150
+ response = script_writer_agent.invoke({
151
+ "video_link": request.video_link,
152
+ "target_word_count": request.target_word_count
153
+ })
154
+ return {
155
+ "scripts": response.get("script_writer_response", []),
156
+ "total_scripts": len(response.get("script_writer_response", [])),
157
+ "full_response": response,
158
+ }
src/config/__pycache__/cloudinary.cpython-311.pyc ADDED
Binary file (5.31 kB). View file
 
src/config/__pycache__/constants.cpython-311.pyc ADDED
Binary file (595 Bytes). View file