File size: 2,416 Bytes
2319518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import copy
from typing import Dict, List

from qwen_agent.actions import GenKeyword
from qwen_agent.memory.similarity_search import SimilaritySearch
from qwen_agent.utils.tokenization_qwen import count_tokens


# TODO: Design the interface.
class Memory:

    def __init__(self, llm=None, stream=False):
        self.search_agent = SimilaritySearch()
        self.keyword_agent = GenKeyword(llm=llm, stream=stream)

    def get(self, query: str, records: list, max_token=4000) -> List[Dict]:
        # token counter backup
        new_records = []
        for record in records:
            if not record['raw']:
                continue
            if 'token' not in record['raw'][0]['page_content']:
                tmp = []
                for page in record['raw']:
                    new_page = copy.deepcopy(page)
                    new_page['token'] = count_tokens(page['page_content'])
                    tmp.append(new_page)
                record['raw'] = tmp
            new_records.append(record)
        records = new_records

        single_max_token = int(max_token / len(records))
        _ref_list = []
        for record in records:
            now_ref_list = self.search_agent.run(record, query,
                                                 single_max_token,
                                                 self.keyword_agent)
            if now_ref_list['text']:
                _ref_list.append(now_ref_list)

        if not _ref_list:
            _ref_list = self.get_top(records,
                                     single_max_token=single_max_token)

        return _ref_list

    def get_top(self, records: list, single_max_token=4000):
        _ref_list = []
        for record in records:
            now_token = 0
            raw = record['raw']
            tmp = {'url': record['url'], 'text': []}
            for page in raw:
                if (now_token + page['token']) <= single_max_token:
                    tmp['text'].append(page['page_content'])
                    now_token += page['token']
                else:
                    use_rate = (
                        (single_max_token - now_token) / page['token']) * 0.2
                    tmp['text'].append(
                        page['page_content']
                        [:int(len(page['page_content']) * use_rate)])
                    break

            _ref_list.append(tmp)
        return _ref_list