theRealNG commited on
Commit
0aba2e5
·
unverified ·
2 Parent(s): 36298774ad7a82

Merge pull request #19 from beautiful-code/course_lessons_extraction

Browse files
.gitignore CHANGED
@@ -7,3 +7,4 @@ evaluated_articles.json
7
  final_articles.json
8
  learning_profile.json
9
  pitched_articles.json
 
 
7
  final_articles.json
8
  learning_profile.json
9
  pitched_articles.json
10
+ webpageScreenshot.png
agents/article_evaluator.py CHANGED
@@ -1,7 +1,7 @@
1
  from crewai import Agent
2
  from llms.gpt import llm
3
- from tools.helpers import streamlit_callback
4
- from tools.scrape_website import scrape_tool
5
 
6
  article_evaluator = Agent(
7
  role="Recommended Article Evaluator",
 
1
  from crewai import Agent
2
  from llms.gpt import llm
3
+ from workflows.tools.helpers import streamlit_callback
4
+ from workflows.tools.scrape_website import scrape_tool
5
 
6
  article_evaluator = Agent(
7
  role="Recommended Article Evaluator",
agents/curiosity_catalyst.py CHANGED
@@ -1,7 +1,7 @@
1
  from crewai import Agent
2
  from llms.gpt import llm
3
- from tools.helpers import streamlit_callback
4
- from tools.scrape_website import scrape_tool
5
 
6
  curiosity_catalyst = Agent(
7
  role="Curiosity Catalyst",
 
1
  from crewai import Agent
2
  from llms.gpt import llm
3
+ from workflows.tools.helpers import streamlit_callback
4
+ from workflows.tools.scrape_website import scrape_tool
5
 
6
  curiosity_catalyst = Agent(
7
  role="Curiosity Catalyst",
agents/learning_curator.py CHANGED
@@ -1,8 +1,8 @@
1
  from crewai import Agent
2
  from llms.gpt import llm
3
- from tools.helpers import streamlit_callback
4
- from tools.scrape_website import scrape_tool
5
- from tools.search_web import search_tool
6
 
7
  learning_curator = Agent(
8
  role="Personal Learning Curator",
 
1
  from crewai import Agent
2
  from llms.gpt import llm
3
+ from workflows.tools.helpers import streamlit_callback
4
+ from workflows.tools.scrape_website import scrape_tool
5
+ from workflows.tools.search_web import search_tool
6
 
7
  learning_curator = Agent(
8
  role="Personal Learning Curator",
agents/learning_profiler.py CHANGED
@@ -1,7 +1,7 @@
1
  from crewai import Agent
2
  from llms.gpt import llm
3
- from tools.helpers import streamlit_callback
4
- from tools.scrape_website import scrape_tool
5
 
6
  learning_profiler = Agent(
7
  role="Personal Learning Profiler",
 
1
  from crewai import Agent
2
  from llms.gpt import llm
3
+ from workflows.tools.helpers import streamlit_callback
4
+ from workflows.tools.scrape_website import scrape_tool
5
 
6
  learning_profiler = Agent(
7
  role="Personal Learning Profiler",
endpoints.py CHANGED
@@ -1,7 +1,8 @@
1
  from dotenv import load_dotenv
2
  import uvicorn
3
  from fastapi import FastAPI, Query
4
- from .crew.til import TilCrew, TilFeedbackResponse
 
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from langsmith import Client
7
  from typing import List, Optional
@@ -45,22 +46,11 @@ async def til_feedback_kickoff(content: List[str]) -> TilFeedbackResponse:
45
  result = TilCrew().kickoff(inputs)
46
  return result
47
 
48
- class Feedback(BaseModel):
49
- helpful_score: Optional[float]
50
- feedback_on: Optional[str]
51
-
52
  @app.post("/til_feedback/{run_id}/feedback", tags=["til_feedback"])
53
  async def capture_feedback(run_id: UUID4, feedback: Feedback) -> str:
54
  print("Helful Score: ", feedback.helpful_score)
55
  print("Feedback On: ", feedback.feedback_on)
56
- client = Client()
57
- client.create_feedback(
58
- str(run_id),
59
- key="helpful",
60
- score=feedback.helpful_score,
61
- source_info={"til": feedback.feedback_on},
62
- type="api",
63
- )
64
  return "ok"
65
 
66
  @app.get("/healthcheck")
 
1
  from dotenv import load_dotenv
2
  import uvicorn
3
  from fastapi import FastAPI, Query
4
+ from workflows.til import TilCrew, TilFeedbackResponse
5
+ from workflows.utils.feedback import Feedback
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from langsmith import Client
8
  from typing import List, Optional
 
46
  result = TilCrew().kickoff(inputs)
47
  return result
48
 
 
 
 
 
49
  @app.post("/til_feedback/{run_id}/feedback", tags=["til_feedback"])
50
  async def capture_feedback(run_id: UUID4, feedback: Feedback) -> str:
51
  print("Helful Score: ", feedback.helpful_score)
52
  print("Feedback On: ", feedback.feedback_on)
53
+ TilCrew.post_feedback(run_id=run_id, feedback=feedback)
 
 
 
 
 
 
 
54
  return "ok"
55
 
56
  @app.get("/healthcheck")
packages.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ libnss3
2
+ libnspr4
3
+ libatk1.0-0
4
+ libatk-bridge2.0-0
5
+ libcups2
6
+ libatspi2.0-0
7
+ libxcomposite1
8
+ libxdamage1
requirements.txt CHANGED
@@ -14,3 +14,9 @@ uvicorn
14
  fastapi_cors
15
  langsmith
16
  pytest
 
 
 
 
 
 
 
14
  fastapi_cors
15
  langsmith
16
  pytest
17
+ playwright
18
+ playwright-stealth
19
+ unstructured
20
+ asyncio
21
+ psutil
22
+ pyppeteer
tasks/create_article_pitch.py CHANGED
@@ -9,7 +9,7 @@ from pydantic import BaseModel
9
  from typing import List
10
 
11
  from agents.curiosity_catalyst import curiosity_catalyst
12
- from tools.scrape_website import scrape_tool
13
  from tasks.create_learning_profile import learning_profile_task
14
  from tasks.evaluate_articles import evaluation_task
15
 
 
9
  from typing import List
10
 
11
  from agents.curiosity_catalyst import curiosity_catalyst
12
+ from workflows.tools.scrape_website import scrape_tool
13
  from tasks.create_learning_profile import learning_profile_task
14
  from tasks.evaluate_articles import evaluation_task
15
 
tools/scrape_website.py DELETED
@@ -1,14 +0,0 @@
1
- from crewai_tools import ScrapeWebsiteTool
2
- import requests
3
- from bs4 import BeautifulSoup
4
-
5
- scrape_tool = ScrapeWebsiteTool()
6
-
7
- def CustomScrapeWebsiteTool(url):
8
- response = requests.get(url)
9
- parsed = BeautifulSoup(response.content, "html.parser")
10
- text = parsed.get_text()
11
- text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
12
- text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
13
-
14
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ui/article_recommendation.py CHANGED
@@ -3,7 +3,7 @@ import json
3
  import streamlit as st
4
  import utils.settings as settings
5
 
6
- from crew.article_suggestion import article_recommendation_crew
7
  from utils.write_to_json import write_dict_to_json as write_dict_to_json
8
  load_dotenv()
9
  settings.init()
 
3
  import streamlit as st
4
  import utils.settings as settings
5
 
6
+ from workflows.article_suggestion import article_recommendation_crew
7
  from utils.write_to_json import write_dict_to_json as write_dict_to_json
8
  load_dotenv()
9
  settings.init()
ui/course_lessons_extractor.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ from contextlib import contextmanager
4
+ from dotenv import load_dotenv
5
+ from workflows.courses.lessons_extractor import LessonsExtractor
6
+ from streamlit_extras.capture import stdout
7
+ load_dotenv()
8
+
9
+
10
+ @contextmanager
11
+ def setup_event_loop():
12
+ loop = asyncio.new_event_loop()
13
+ asyncio.set_event_loop(loop)
14
+ try:
15
+ yield loop
16
+ finally:
17
+ loop.close()
18
+ asyncio.set_event_loop(None)
19
+
20
+ def main():
21
+ st.markdown("<div class='container'>", unsafe_allow_html=True)
22
+
23
+ st.markdown(
24
+ """
25
+ <div class="centered">
26
+ <p class="title">Course Lesson Extractor</p>
27
+ </div>
28
+ """,
29
+ unsafe_allow_html=True
30
+ )
31
+ course_url = st.text_area('Enter the URL for the course:',
32
+ "https://www.coursera.org/learn/google-data-analytics-capstone?specialization=google-data-analytics",
33
+ key='course_url', help='Enter course you want to learn')
34
+
35
+ if st.button("Get Lessons"):
36
+ with st.status(
37
+ "🤖 **Extracting Lessons...**", state="running", expanded=True
38
+ ) as status:
39
+ with st.container(height=500, border=False):
40
+ log_container = st.empty()
41
+ with stdout(log_container.code, terminator=""):
42
+ with setup_event_loop() as loop:
43
+ extractor = LessonsExtractor()
44
+ inputs = {"course_url": course_url}
45
+ results = extractor.kickoff(inputs=inputs)["lessons"]
46
+ status.update(
47
+ label="✅ Extracted Lessons!",
48
+ state="complete",
49
+ expanded=False,
50
+ )
51
+
52
+ for idx, lesson in enumerate(results):
53
+ st.markdown(f"#### Lessons {idx}: {lesson['name']}")
54
+ st.markdown(f"Concpets: {', '.join(lesson['concepts'])}")
55
+
56
+ if __name__ == "__main__":
57
+ main()
ui/research_paper.py CHANGED
@@ -1,11 +1,11 @@
1
  import streamlit as st
2
- from crew.research_article_suggester import RecentArticleSuggester
3
  from streamlit_extras.capture import stdout
4
 
5
 
6
 
7
  def main():
8
-
9
  st.markdown(
10
  """
11
  <style>
 
1
  import streamlit as st
2
+ from workflows.research_article_suggester import RecentArticleSuggester
3
  from streamlit_extras.capture import stdout
4
 
5
 
6
 
7
  def main():
8
+
9
  st.markdown(
10
  """
11
  <style>
ui/til_feedback.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
- from crew.til import TilCrew
4
  from streamlit_extras.capture import stdout
5
  load_dotenv()
6
 
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
+ from workflows.til import TilCrew
4
  from streamlit_extras.capture import stdout
5
  load_dotenv()
6
 
ui_main.py CHANGED
@@ -1,16 +1,20 @@
1
- import streamlit as st
2
- import math
3
  from dotenv import load_dotenv
4
  from streamlit_extras.stylable_container import stylable_container
5
- from PIL import Image
6
  from ui.article_recommendation import main as article_recommendor_main
 
7
  from ui.research_paper import main as research_article_suggester_main
8
  from ui.til_feedback import main as feedback_main
 
 
 
9
 
10
  load_dotenv()
11
 
 
 
 
12
 
13
- st.set_page_config(page_title='Multi-Page App', page_icon='📰', layout='wide')
14
 
15
  def load_css(file_name):
16
  with open(file_name) as f:
@@ -29,24 +33,27 @@ def main():
29
  research_article_suggester_main()
30
  elif st.session_state.page == "feedback":
31
  feedback_main()
 
 
32
 
33
  def show_main_page():
34
 
35
  css = load_css("ui/main.css")
36
  st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
37
 
38
- st.markdown('<div class="main-title">Welcome to the Multi-Page App!</div>', unsafe_allow_html=True)
39
  st.markdown("---")
40
- st.markdown('<div class="sub-header">Navigate to Specific Pages:</div>', unsafe_allow_html=True)
41
 
42
  card_info = [
 
 
43
  {"title": "Article Recommender", "description": "Discover articles tailored to your interests.", "key": "article_recommendor"},
44
  {"title": "Recent Article Suggester", "description": "Get suggestions for recent research articles.", "key": "research_article_suggester"},
45
- {"title": "Feedback", "description": "Provide your valuable feedback.", "key": "feedback"},
46
  ]
47
 
48
  num_cols = 3
49
- num_rows = math.ceil(len(card_info) / num_cols)
50
 
51
 
52
  for row in range(num_rows):
@@ -59,7 +66,7 @@ def show_main_page():
59
  with stylable_container(
60
  key="inside_container_with_border",
61
  css_styles="""
62
- {
63
  background-color: #f8f9fa;
64
  border-radius: 10px;
65
  box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.1);
 
 
 
1
  from dotenv import load_dotenv
2
  from streamlit_extras.stylable_container import stylable_container
 
3
  from ui.article_recommendation import main as article_recommendor_main
4
+ from ui.course_lessons_extractor import main as lessons_extractor_main
5
  from ui.research_paper import main as research_article_suggester_main
6
  from ui.til_feedback import main as feedback_main
7
+ import math
8
+ import streamlit as st
9
+ import subprocess
10
 
11
  load_dotenv()
12
 
13
+ # Running required system commands
14
+ subprocess.run(["playwright", "install", "chromium"])
15
+
16
 
17
+ st.set_page_config(page_title='Growthy AI Workflows', page_icon='📰', layout='wide')
18
 
19
  def load_css(file_name):
20
  with open(file_name) as f:
 
33
  research_article_suggester_main()
34
  elif st.session_state.page == "feedback":
35
  feedback_main()
36
+ elif st.session_state.page == "lessons_extractor":
37
+ lessons_extractor_main()
38
 
39
  def show_main_page():
40
 
41
  css = load_css("ui/main.css")
42
  st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
43
 
44
+ st.markdown('<div class="main-title">Welcome to Growthy AI Workflows!</div>', unsafe_allow_html=True)
45
  st.markdown("---")
46
+ st.markdown('<div class="sub-header">Navigate to Specific Workflow:</div>', unsafe_allow_html=True)
47
 
48
  card_info = [
49
+ {"title": "TIL Feedback", "description": "Provide your valuable feedback.", "key": "feedback"},
50
+ {"title": "Course Lesson Extractor", "description": "Extract lessons for a given course", "key": "lessons_extractor"},
51
  {"title": "Article Recommender", "description": "Discover articles tailored to your interests.", "key": "article_recommendor"},
52
  {"title": "Recent Article Suggester", "description": "Get suggestions for recent research articles.", "key": "research_article_suggester"},
 
53
  ]
54
 
55
  num_cols = 3
56
+ num_rows = math.ceil(len(card_info) / num_cols)
57
 
58
 
59
  for row in range(num_rows):
 
66
  with stylable_container(
67
  key="inside_container_with_border",
68
  css_styles="""
69
+ {
70
  background-color: #f8f9fa;
71
  border-radius: 10px;
72
  box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.1);
workflows/__init__.py ADDED
File without changes
{crew → workflows}/article_suggestion.py RENAMED
File without changes
workflows/courses/lessons_extractor.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import callbacks
2
+ from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
3
+ from langchain_core.messages import SystemMessage, HumanMessage
4
+ from langchain_core.output_parsers import JsonOutputParser
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from langchain_openai import ChatOpenAI
7
+ from pydantic import BaseModel, Field
8
+ from typing import List
9
+ from workflows.tools.scrape_website import WebpageScreenshot
10
+ from crewai import Agent, Task, Crew
11
+ from crewai_tools import ScrapeWebsiteTool
12
+ import base64
13
+ import os
14
+ import pprint
15
+
16
+
17
+ class LessonsExtractor:
18
+ def kickoff(self, inputs={}):
19
+ self.course_url = inputs["course_url"]
20
+ self._extract_lessons()
21
+
22
+ return {"run_id": self.run_id, "lessons": self.lessons}
23
+
24
+ def _extract_lessons(self):
25
+ self.course_webpage_content = self._scrape_webpage_content()
26
+ extractor_chain = self._build_lessons_extractor_chain()
27
+ pprint.pp("Extracting Lessons....")
28
+ with callbacks.collect_runs() as cb:
29
+ self.lessons = extractor_chain.invoke(
30
+ {"screenshot": self.course_webpage_content})["lessons"]
31
+ self.run_id = cb.traced_runs[0].id
32
+ print("Run ID: ", self.run_id)
33
+ print("Lessons")
34
+ pprint.pp(self.lessons)
35
+
36
+ def _scrape_webpage_content(self):
37
+ pprint.pp("Scraping Courses....")
38
+ webpage_content = WebpageScreenshot(self.course_url)
39
+ # Testing the screenshot taken
40
+ # image_data = base64.b64decode(webpage_content)
41
+ # with open("webpageScreenshot.png", "wb") as fh:
42
+ # fh.write(image_data)
43
+
44
+ print("Webpage Content:")
45
+ pprint.pp(webpage_content)
46
+ return webpage_content
47
+
48
+ def _build_lessons_extractor_chain(self):
49
+ course_parser = JsonOutputParser(pydantic_object=Course)
50
+ prompt = ChatPromptTemplate.from_messages([
51
+ SystemMessage(
52
+ "You are an expert in understanding a course webpage. "
53
+ "Your goal is to extract the course content that will be covered as part of the course from the screenshot of the course webpage. "
54
+ f"Formatting Instructions: {course_parser.get_format_instructions()}"
55
+ ),
56
+ HumanMessage(
57
+ content=[
58
+ {"type": "text", "text": "Here is the course webpage screenshot"},
59
+ {"type": "image_url", "image_url": {
60
+ "url": f"data:image/png;base64,{self.course_webpage_content}",
61
+ "detail": "auto",
62
+ }}
63
+ ]
64
+ )
65
+ ])
66
+ llm = ChatOpenAI(model=os.environ['OPENAI_MODEL'], temperature=0.2)
67
+
68
+ extractor_chian = (prompt | llm | course_parser).with_config({
69
+ "tags": ["courses"], "run_name": "Extracting Lessons",
70
+ "metadata": {
71
+ "versoin": "v1.0.0",
72
+ "growth_activity": "courses",
73
+ "env": os.environ["ENV"],
74
+ "model": os.environ["OPENAI_MODEL"],
75
+ }
76
+ })
77
+
78
+ return extractor_chian
79
+
80
+
81
+ class Lesson(BaseModel):
82
+ name: str = Field(description="Lesson name mentioned in the screenshot.")
83
+ concepts: List[str] = Field(description="What are the concepts mentioned in the screeshot "
84
+ "that the user will learn as part of this lesson. "
85
+ "If nothing is mentioned return an empty string."
86
+ )
87
+
88
+
89
+ class Course(BaseModel):
90
+ lessons: List[Lesson]
{crew → workflows}/research_article_suggester.py RENAMED
@@ -12,7 +12,7 @@ from langchain_core.prompts import ChatPromptTemplate
12
  from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
13
  from langchain_core.output_parsers import JsonOutputParser
14
 
15
- from tools.scrape_website import scrape_tool, CustomScrapeWebsiteTool
16
 
17
  MAX_RESULTS = 2
18
  AGE_OF_RESEARCH_PAPER = 60
 
12
  from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
13
  from langchain_core.output_parsers import JsonOutputParser
14
 
15
+ from workflows.tools.scrape_website import scrape_tool, CustomScrapeWebsiteTool
16
 
17
  MAX_RESULTS = 2
18
  AGE_OF_RESEARCH_PAPER = 60
{crew → workflows}/til.py RENAMED
@@ -6,8 +6,10 @@ from langchain_core.messages import SystemMessage
6
  from langchain_core.output_parsers import JsonOutputParser
7
  from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
8
  from langchain_openai import ChatOpenAI
 
9
  from pydantic import BaseModel, Field, UUID4
10
  from typing import List, Optional
 
11
  import os
12
  import pprint
13
 
@@ -16,10 +18,20 @@ class TilCrew:
16
  print("Human Message:")
17
  pprint.pp(inputs)
18
  self.content = inputs["content"]
19
- # self._gather_facts()
20
  self._gather_feedback()
21
  return self._final_call_on_feedback()
22
 
 
 
 
 
 
 
 
 
 
 
 
23
  def _final_call_on_feedback(self):
24
  final_results = []
25
  for feedback in self.feedback_results:
 
6
  from langchain_core.output_parsers import JsonOutputParser
7
  from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
8
  from langchain_openai import ChatOpenAI
9
+ from langsmith import Client
10
  from pydantic import BaseModel, Field, UUID4
11
  from typing import List, Optional
12
+ from workflows.utils.feedback import Feedback
13
  import os
14
  import pprint
15
 
 
18
  print("Human Message:")
19
  pprint.pp(inputs)
20
  self.content = inputs["content"]
 
21
  self._gather_feedback()
22
  return self._final_call_on_feedback()
23
 
24
+ def post_feedback(run_id: UUID4, feedback: Feedback):
25
+ client = Client()
26
+ client.create_feedback(
27
+ str(run_id),
28
+ key=feedback.metric_type,
29
+ score=feedback.metric_score,
30
+ source_info={"til": feedback.feedback_on},
31
+ type="api",
32
+ )
33
+
34
+
35
  def _final_call_on_feedback(self):
36
  final_results = []
37
  for feedback in self.feedback_results:
{tools → workflows/tools}/helpers.py RENAMED
File without changes
workflows/tools/scrape_website.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ from crewai_tools import ScrapeWebsiteTool
3
+ from playwright.async_api import async_playwright
4
+ from playwright_stealth import stealth_async
5
+ from pyppeteer import launch
6
+ import asyncio
7
+ import base64
8
+ import requests
9
+
10
+
11
+ scrape_tool = ScrapeWebsiteTool()
12
+
13
+
14
+ def CustomScrapeWebsiteTool(url):
15
+ headers = {
16
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
17
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
18
+ 'Accept-Language': 'en-US,en;q=0.9',
19
+ 'Referer': 'https://www.google.com/',
20
+ 'Connection': 'keep-alive',
21
+ 'Upgrade-Insecure-Requests': '1',
22
+ 'Accept-Encoding': 'gzip, deflate, br'
23
+ }
24
+ response = requests.get(url, headers=headers)
25
+ parsed = BeautifulSoup(response.content, "html.parser")
26
+ text = parsed.get_text()
27
+ text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
28
+ text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
29
+
30
+ return text
31
+
32
+
33
+ async def AsyncWebpageScreenshot(url):
34
+ async with async_playwright() as p:
35
+ browser = await p.chromium.launch(headless=True)
36
+ page = await browser.new_page()
37
+ stealth_async(page)
38
+ await page.goto(url)
39
+
40
+ screenshot_bytes = await page.screenshot(full_page=True)
41
+
42
+ await browser.close()
43
+
44
+ base64_image = base64.b64encode(screenshot_bytes).decode("utf-8")
45
+ return base64_image
46
+
47
+
48
+ def WebpageScreenshot(url):
49
+ print("Taking screenshot: ", url)
50
+ result = asyncio.run(AsyncWebpageScreenshot(url))
51
+ return result
52
+
53
+
54
+ async def AsyncPyppeteerWebpageScreenshot(url):
55
+ browser = await launch()
56
+ page = await browser.newPage()
57
+ await page.goto(url)
58
+ screenshot_bytes = await page.screenshot()
59
+ await browser.close()
60
+
61
+ base64_image = base64.b64encode(screenshot_bytes).decode("utf-8")
62
+ return base64_image
63
+
64
+ def PyppeteerWebpageScreenshot(url):
65
+ print("Taking screenshot: ", url)
66
+ result = asyncio.run(AsyncPyppeteerWebpageScreenshot(url))
67
+ return result
{tools → workflows/tools}/search_web.py RENAMED
File without changes
workflows/utils/feedback.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional
3
+
4
+ class Feedback(BaseModel):
5
+ helpful_score: Optional[float]
6
+ metric_type: Optional[str]
7
+ metric_score: Optional[float]
8
+ feedback_on: Optional[str]