File size: 3,633 Bytes
0dd81f9
 
 
 
68114ba
0dd81f9
 
 
5177379
0dd81f9
 
5177379
 
0dd81f9
 
 
 
68114ba
 
c9c06b9
68114ba
 
5177379
0dd81f9
68114ba
0dd81f9
5177379
0dd81f9
 
 
 
 
 
 
68114ba
 
 
 
 
 
 
 
 
 
 
0dd81f9
68114ba
 
 
 
 
 
 
 
 
0dd81f9
68114ba
 
0dd81f9
 
 
 
 
68114ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from smolagents.tools import Tool
import requests
from typing import List, Dict
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer, util

class OdooDocumentationSearchTool(Tool):
    name = "odoo_documentation_search"
    description = "Searches the Odoo documentation for functional or technical queries and returns related results for a specific Odoo version."

    inputs = {
        "query": {"type": "string", "description": "The search query (e.g., 'how to create a new module')"},
        "version": {"type": "string", "description": "The Odoo version to search (e.g., '16.0', '17.0', '18.0')"}
    }

    output_type = "array"

    def __init__(self, query=None):
        # Load the SentenceTransformer model
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.is_initialized = True

    def forward(self, query: str, version: str) -> List[Dict]:
        """
        Searches the Odoo documentation and returns related results using semantic search and reranking.
        """
        base_url = f"https://www.odoo.com/documentation/{version}/"

        try:
            response = requests.get(base_url)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, "html.parser")

            # Extract relevant sections from the documentation
            sections = []
            for element in soup.find_all(['h1', 'h2', 'h3', 'p', 'li']):
                sections.append(element.get_text().strip())

            # Embed the sections and the query
            section_embeddings = self.model.encode(sections, convert_to_tensor=True)
            query_embedding = self.model.encode(query, convert_to_tensor=True)

            # Calculate cosine similarity
            cosine_scores = util.pytorch_cos_sim(query_embedding, section_embeddings)[0]

            # Rank the sections based on similarity scores
            section_scores = list(zip(sections, cosine_scores))
            ranked_sections = sorted(section_scores, key=lambda x: x[1], reverse=True)

            # Rerank the top-k sections (Placeholder - Replace with actual reranking implementation)
            reranked_sections = self.rerank_sections(ranked_sections[:10], query)

            # Return the top-n ranked sections
            top_n = 5
            results = []
            for section, score in reranked_sections[:top_n]:
                results.append({"Result": section, "Score": str(score.item())})

            return results

        except requests.exceptions.RequestException as e:
            return [{"Error": f"Error fetching Odoo documentation: {str(e)}"}]

    def rerank_sections(self, ranked_sections: List[tuple], query: str) -> List[tuple]:
        """
        Reranks the top-k sections based on a keyword-based approach.
        """
        # Extract keywords from the query
        query_keywords = [word for word in query.lower().split() if word not in ['a', 'an', 'the', 'is', 'are', 'in', 'on', 'at', 'to', 'for', 'of']]

        # Calculate keyword scores for each section
        reranked_sections = []
        for section, score in ranked_sections:
            keyword_score = 0
            for keyword in query_keywords:
                keyword_score += section.lower().count(keyword)
            
            # Adjust the similarity scores
            adjusted_score = score + keyword_score

            reranked_sections.append((section, adjusted_score))

        # Sort the sections based on the adjusted scores
        reranked_sections = sorted(reranked_sections, key=lambda x: x[1], reverse=True)

        return reranked_sections