File size: 4,238 Bytes
0644a6d
 
 
 
 
 
 
 
 
d7adf93
 
 
0644a6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7adf93
 
 
0644a6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7adf93
 
 
 
0644a6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7adf93
 
0644a6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from smolagents import Tool
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from wikipedia_utils import *
from youtube_utils import *


class MathModelQuerer(Tool):
    name = "math_model"
    description = "Solves advanced math problems using a pretrained\
    large language model specialized in mathematics. Ideal for symbolic reasoning, \
    calculus, algebra, and other technical math queries."

    inputs = {
        "problem": {
            "type": "string",
            "description": "Math problem to solve.",
        }
    }

    output_type = "string"

    def __init__(self, model_name="deepseek-ai/deepseek-math-7b-base"):
        print(f"Loading math model: {model_name}")

        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        print("loaded tokenizer")
        self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
        print("loaded auto model")

        self.model.generation_config = GenerationConfig.from_pretrained(model_name)
        print("loaded coonfig")

        self.model.generation_config.pad_token_id = self.model.generation_config.eos_token_id
        print("loaded pad token")

    def forward(self, problem: str) -> str:
        try:
            print(f"[MathModelTool] Question: {problem}")

            inputs = self.tokenizer(problem, return_tensors="pt")
            outputs = self.model.generate(**inputs, max_new_tokens=100)

            result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            return result
        except:
            return f"Failed using the tool {self.name}"


class CodeModelQuerer(Tool):
    name = "code_querer"
    description = "Generates code snippets based on a natural language description of a\
    programming task using a powerful coding-focused language model. Suitable\
    for solving coding problems, generating functions, or implementing algorithms."

    inputs = {
        "problem": {
            "type": "string",
            "description": "Description of a code sample to be generated",
        }
    }

    output_type = "string"

    def __init__(self, model_name="Qwen/Qwen2.5-Coder-32B-Instruct"):
        from smolagents import HfApiModel
        print(f"Loading llm for Code tool: {model_name}")
        self.model = HfApiModel()

    def forward(self, problem: str) -> str:
        try:
            return self.model.generate(problem, max_new_tokens=512)
        except:
            return f"Failed using the tool {self.name}"


class WikipediaPageFetcher(Tool):
    name = "wiki_page_fetcher"
    description =' Searches and fetches summaries from Wikipedia for any topic,\
    across all supported languages and versions. Only a single query string is required as input.'



    inputs = {
        "query": {
            "type": "string",
            "description": "Topic of wikipedia search",
        }
    }

    output_type = "string"

    def forward(self, query: str) -> str:
        try:
            wiki_query = query(query)
            wiki_page = fetch_wikipedia_page(wiki_query)
            return wiki_page
        except:
            return f"Failed using the tool {self.name}"


class YoutubeTranscriptFetcher(Tool):
    name = "youtube_transcript_fetcher"
    description ="Fetches the English transcript of a YouTube video using either a direct video \
    ID or a URL that includes one. Accepts a query containing the link or the raw video ID directly. Returns the transcript as plain text."

    inputs = {
        "query": {
            "type": "string",
            "description": "A query that includes youtube id."
        },
        "video_id" : {
            "type" : "string",
            "description" : "Optional string with video id from youtube.",
            "nullable"  : True
        }
    }

    output_type = "string"

    def forward(self, query: str, video_id=None) -> str:
        try:
            if video_id is None:
                video_id = get_youtube_video_id(query)

            fetched_transcript = fetch_transcript_english(video_id)

            return post_process_transcript(fetched_transcript)
        except:
            return f"Failed using the tool {self.name}"