H commited on
Commit
9ae711a
·
1 Parent(s): aafe599

Add component Wikipedia (#1513)

Browse files

### What problem does this PR solve?


### Type of change

- [x] New Feature (non-breaking change which adds functionality)

graph/component/__init__.py CHANGED
@@ -11,6 +11,7 @@ from .rewrite import RewriteQuestion, RewriteQuestionParam
11
  from .keyword import KeywordExtract, KeywordExtractParam
12
  from .baidu import Baidu, BaiduParam
13
  from .duckduckgo import DuckDuckGo, DuckDuckGoParam
 
14
 
15
 
16
  def component_class(class_name):
 
11
  from .keyword import KeywordExtract, KeywordExtractParam
12
  from .baidu import Baidu, BaiduParam
13
  from .duckduckgo import DuckDuckGo, DuckDuckGoParam
14
+ from .wikipedia import Wikipedia, WikipediaParam
15
 
16
 
17
  def component_class(class_name):
graph/component/wikipedia.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ import random
17
+ from abc import ABC
18
+ from functools import partial
19
+ import wikipedia
20
+ import pandas as pd
21
+ from graph.settings import DEBUG
22
+ from graph.component.base import ComponentBase, ComponentParamBase
23
+
24
+
25
+ class WikipediaParam(ComponentParamBase):
26
+ """
27
+ Define the Wikipedia component parameters.
28
+ """
29
+
30
+ def __init__(self):
31
+ super().__init__()
32
+ self.top_n = 10
33
+
34
+ def check(self):
35
+ self.check_positive_integer(self.top_n, "Top N")
36
+
37
+
38
+ class Wikipedia(ComponentBase, ABC):
39
+ component_name = "Wikipedia"
40
+
41
+ def _run(self, history, **kwargs):
42
+ ans = self.get_input()
43
+ ans = " - ".join(ans["content"]) if "content" in ans else ""
44
+ if not ans:
45
+ return Wikipedia.be_output(self._param.no)
46
+
47
+ wiki_res = []
48
+ for wiki_key in wikipedia.search(ans, results=self._param.top_n):
49
+ try:
50
+ page = wikipedia.page(title=wiki_key, auto_suggest=False)
51
+ wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
52
+ except Exception as e:
53
+ print(e)
54
+ pass
55
+
56
+ if not wiki_res:
57
+ return Wikipedia.be_output(self._param.no)
58
+
59
+ df = pd.DataFrame(wiki_res)
60
+ if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
61
+ return df
graph/test/dsl_examples/keyword_wikipedia_and_generate.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "components": {
3
+ "begin": {
4
+ "obj":{
5
+ "component_name": "Begin",
6
+ "params": {
7
+ "prologue": "Hi there!"
8
+ }
9
+ },
10
+ "downstream": ["answer:0"],
11
+ "upstream": []
12
+ },
13
+ "answer:0": {
14
+ "obj": {
15
+ "component_name": "Answer",
16
+ "params": {}
17
+ },
18
+ "downstream": ["keyword:0"],
19
+ "upstream": ["begin"]
20
+ },
21
+ "keyword:0": {
22
+ "obj": {
23
+ "component_name": "KeywordExtract",
24
+ "params": {
25
+ "llm_id": "deepseek-chat",
26
+ "prompt": "- Role: You're a question analyzer.\n - Requirements:\n - Summarize user's question, and give top %s important keyword/phrase.\n - Use comma as a delimiter to separate keywords/phrases.\n - Answer format: (in language of user's question)\n - keyword: ",
27
+ "temperature": 0.2,
28
+ "top_n": 1
29
+ }
30
+ },
31
+ "downstream": ["wikipedia:0"],
32
+ "upstream": ["answer:0"]
33
+ },
34
+ "wikipedia:0": {
35
+ "obj":{
36
+ "component_name": "Wikipedia",
37
+ "params": {
38
+ "top_n": 10
39
+ }
40
+ },
41
+ "downstream": ["generate:0"],
42
+ "upstream": ["keyword:0"]
43
+ },
44
+ "generate:1": {
45
+ "obj": {
46
+ "component_name": "Generate",
47
+ "params": {
48
+ "llm_id": "deepseek-chat",
49
+ "prompt": "You are an intelligent assistant. Please answer the question based on content from Wikipedia. When the answer from Wikipedia is incomplete, you need to output the URL link of the corresponding content as well. When all the content searched from Wikipedia is irrelevant to the question, your answer must include the sentence, \"The answer you are looking for is not found in the Wikipedia!\". Answers need to consider chat history.\n The content of Wikipedia is as follows:\n {input}\n The above is the content of Wikipedia.",
50
+ "temperature": 0.2
51
+ }
52
+ },
53
+ "downstream": ["answer:0"],
54
+ "upstream": ["wikipedia:0"]
55
+ }
56
+ },
57
+ "history": [],
58
+ "path": [],
59
+ "messages": [],
60
+ "reference": {},
61
+ "answer": []
62
+ }
requirements.txt CHANGED
@@ -148,4 +148,5 @@ mistralai==0.4.2
148
  boto3==1.34.140
149
  duckduckgo_search==6.1.9
150
  google-generativeai==0.7.2
151
- groq==0.9.0
 
 
148
  boto3==1.34.140
149
  duckduckgo_search==6.1.9
150
  google-generativeai==0.7.2
151
+ groq==0.9.0
152
+ wikipedia==1.4.0
requirements_arm.txt CHANGED
@@ -150,3 +150,4 @@ boto3==1.34.140
150
  duckduckgo_search==6.1.9
151
  google-generativeai==0.7.2
152
  groq==0.9.0
 
 
150
  duckduckgo_search==6.1.9
151
  google-generativeai==0.7.2
152
  groq==0.9.0
153
+ wikipedia==1.4.0
requirements_dev.txt CHANGED
@@ -134,4 +134,5 @@ mistralai==0.4.2
134
  boto3==1.34.140
135
  duckduckgo_search==6.1.9
136
  google-generativeai==0.7.2
137
- groq==0.9.0
 
 
134
  boto3==1.34.140
135
  duckduckgo_search==6.1.9
136
  google-generativeai==0.7.2
137
+ groq==0.9.0
138
+ wikipedia==1.4.0