Hasan Iqbal
commited on
Commit
•
0d800ab
1
Parent(s):
4ea47ac
Generalized the Config behaviour
Browse files- .gitignore +3 -0
- .python-version +0 -1
- configs/production.json +0 -19
- pre-requirements.txt +0 -1
- requirements.txt +25 -0
- src/openfactcheck/__init__.py +2 -0
- src/openfactcheck/app/app.py +1 -1
- src/openfactcheck/core/cli.py +1 -7
- src/openfactcheck/lib/config.py +221 -77
- src/openfactcheck/lib/errors.py +4 -1
- src/openfactcheck/solvers/{solvers.py → __init__.py} +0 -0
- src/openfactcheck/templates/__init__.py +0 -0
- {configs/solvers → src/openfactcheck/templates/solver_configs}/dummy.yaml +0 -0
- {configs/solvers → src/openfactcheck/templates/solver_configs}/factcheckgpt.yaml +0 -0
- {configs/solvers → src/openfactcheck/templates/solver_configs}/factool.yaml +0 -0
- {configs/solvers → src/openfactcheck/templates/solver_configs}/rarr.yaml +0 -0
.gitignore
CHANGED
@@ -18,6 +18,9 @@ configs/dev*.json
|
|
18 |
.venv/
|
19 |
__pycache__/
|
20 |
*.pyc
|
|
|
|
|
|
|
21 |
|
22 |
# Logs and Output
|
23 |
*.log
|
|
|
18 |
.venv/
|
19 |
__pycache__/
|
20 |
*.pyc
|
21 |
+
build/
|
22 |
+
dist/
|
23 |
+
*.egg-info/
|
24 |
|
25 |
# Logs and Output
|
26 |
*.log
|
.python-version
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
3.11.9
|
|
|
|
configs/production.json
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"retries": 3,
|
3 |
-
"pipeline": {
|
4 |
-
"claimprocessor": "factool_claimprocessor",
|
5 |
-
"retriever": "factool_retriever",
|
6 |
-
"verifier": "factool_verifier"
|
7 |
-
},
|
8 |
-
"solver_configs": [
|
9 |
-
"configs/solvers/factool.yaml",
|
10 |
-
"configs/solvers/factcheckgpt.yaml",
|
11 |
-
"configs/solvers/rarr.yaml"
|
12 |
-
],
|
13 |
-
"solver_paths": [
|
14 |
-
"src/openfactcheck/solvers/factcheckgpt",
|
15 |
-
"src/openfactcheck/solvers/webservice"
|
16 |
-
],
|
17 |
-
"output_path": "tmp/output",
|
18 |
-
"verbose": "INFO"
|
19 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pre-requirements.txt
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
factool==0.1.3
|
|
|
|
requirements.txt
CHANGED
@@ -4,7 +4,9 @@ alabaster==0.7.16
|
|
4 |
altair==5.3.0
|
5 |
annotated-types==0.7.0
|
6 |
anyio==4.4.0
|
|
|
7 |
arrow==1.3.0
|
|
|
8 |
async-timeout==4.0.3
|
9 |
asyncio==3.4.3
|
10 |
attrs==23.2.0
|
@@ -21,11 +23,14 @@ certifi==2024.7.4
|
|
21 |
charset-normalizer==3.3.2
|
22 |
click==8.1.7
|
23 |
cloudpathlib==0.18.1
|
|
|
24 |
confection==0.1.5
|
25 |
contourpy==1.2.1
|
26 |
cycler==0.12.1
|
27 |
cymem==2.0.8
|
28 |
datasets==2.20.0
|
|
|
|
|
29 |
Deprecated==1.2.14
|
30 |
dill==0.3.8
|
31 |
distro==1.9.0
|
@@ -33,6 +38,8 @@ docutils==0.20.1
|
|
33 |
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl#sha256=86cc141f63942d4b2c5fcee06630fd6f904788d2f0ab005cce45aadb8fb73889
|
34 |
entrypoints==0.4
|
35 |
evaluate==0.4.2
|
|
|
|
|
36 |
fake-useragent==1.5.1
|
37 |
Faker==26.0.0
|
38 |
fastapi==0.96.0
|
@@ -51,11 +58,16 @@ httpx==0.27.0
|
|
51 |
huggingface-hub==0.24.2
|
52 |
idna==3.7
|
53 |
imagesize==1.4.1
|
|
|
|
|
|
|
54 |
Jinja2==3.1.4
|
55 |
joblib==1.4.2
|
56 |
jsonlines==4.0.0
|
57 |
jsonschema==4.23.0
|
58 |
jsonschema-specifications==2023.12.1
|
|
|
|
|
59 |
kiwisolver==1.4.5
|
60 |
langcodes==3.4.0
|
61 |
language_data==1.2.0
|
@@ -66,12 +78,14 @@ markdown-it-py==3.0.0
|
|
66 |
markdownlit==0.0.7
|
67 |
MarkupSafe==2.1.5
|
68 |
matplotlib==3.9.1
|
|
|
69 |
mdurl==0.1.2
|
70 |
more-itertools==10.3.0
|
71 |
mpmath==1.3.0
|
72 |
multidict==6.0.5
|
73 |
multiprocess==0.70.16
|
74 |
murmurhash==1.0.10
|
|
|
75 |
networkx==3.3
|
76 |
nltk==3.8.1
|
77 |
numpy==1.26.4
|
@@ -79,10 +93,17 @@ openai==1.37.0
|
|
79 |
outcome==1.3.0.post0
|
80 |
packaging==24.1
|
81 |
pandas==2.2.2
|
|
|
|
|
82 |
pillow==10.4.0
|
|
|
83 |
preshed==3.0.9
|
84 |
prometheus_client==0.20.0
|
|
|
85 |
protobuf==5.27.2
|
|
|
|
|
|
|
86 |
pyarrow==17.0.0
|
87 |
pyarrow-hotfix==0.6
|
88 |
pydantic==1.10.9
|
@@ -96,6 +117,7 @@ python-dateutil==2.9.0.post0
|
|
96 |
python-dotenv==1.0.1
|
97 |
pytz==2024.1
|
98 |
PyYAML==6.0
|
|
|
99 |
referencing==0.35.1
|
100 |
regex==2024.7.24
|
101 |
requests==2.32.3
|
@@ -130,6 +152,7 @@ sphinxcontrib-serializinghtml==1.1.10
|
|
130 |
srsly==2.4.8
|
131 |
st-annotated-text==4.0.1
|
132 |
st-theme==1.2.3
|
|
|
133 |
starlette==0.27.0
|
134 |
streamlit==1.36.0
|
135 |
streamlit-camera-input-live==0.2.0
|
@@ -152,6 +175,7 @@ toolz==0.12.1
|
|
152 |
torch==2.4.0
|
153 |
tornado==6.4.1
|
154 |
tqdm==4.66.4
|
|
|
155 |
transformers==4.43.2
|
156 |
trio==0.26.0
|
157 |
trio-websocket==0.11.1
|
@@ -163,6 +187,7 @@ urllib3==2.2.2
|
|
163 |
uvicorn==0.22.0
|
164 |
validators==0.33.0
|
165 |
wasabi==1.1.3
|
|
|
166 |
weasel==0.4.1
|
167 |
websocket-client==1.8.0
|
168 |
wrapt==1.16.0
|
|
|
4 |
altair==5.3.0
|
5 |
annotated-types==0.7.0
|
6 |
anyio==4.4.0
|
7 |
+
appnope==0.1.4
|
8 |
arrow==1.3.0
|
9 |
+
asttokens==2.4.1
|
10 |
async-timeout==4.0.3
|
11 |
asyncio==3.4.3
|
12 |
attrs==23.2.0
|
|
|
23 |
charset-normalizer==3.3.2
|
24 |
click==8.1.7
|
25 |
cloudpathlib==0.18.1
|
26 |
+
comm==0.2.2
|
27 |
confection==0.1.5
|
28 |
contourpy==1.2.1
|
29 |
cycler==0.12.1
|
30 |
cymem==2.0.8
|
31 |
datasets==2.20.0
|
32 |
+
debugpy==1.8.2
|
33 |
+
decorator==5.1.1
|
34 |
Deprecated==1.2.14
|
35 |
dill==0.3.8
|
36 |
distro==1.9.0
|
|
|
38 |
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl#sha256=86cc141f63942d4b2c5fcee06630fd6f904788d2f0ab005cce45aadb8fb73889
|
39 |
entrypoints==0.4
|
40 |
evaluate==0.4.2
|
41 |
+
executing==2.0.1
|
42 |
+
factool @ git+https://github.com/hasaniqbal777/factool@2ae7f41242b6aff0c686314054f1dfea7e44b9d0
|
43 |
fake-useragent==1.5.1
|
44 |
Faker==26.0.0
|
45 |
fastapi==0.96.0
|
|
|
58 |
huggingface-hub==0.24.2
|
59 |
idna==3.7
|
60 |
imagesize==1.4.1
|
61 |
+
ipykernel==6.29.5
|
62 |
+
ipython==8.26.0
|
63 |
+
jedi==0.19.1
|
64 |
Jinja2==3.1.4
|
65 |
joblib==1.4.2
|
66 |
jsonlines==4.0.0
|
67 |
jsonschema==4.23.0
|
68 |
jsonschema-specifications==2023.12.1
|
69 |
+
jupyter_client==8.6.2
|
70 |
+
jupyter_core==5.7.2
|
71 |
kiwisolver==1.4.5
|
72 |
langcodes==3.4.0
|
73 |
language_data==1.2.0
|
|
|
78 |
markdownlit==0.0.7
|
79 |
MarkupSafe==2.1.5
|
80 |
matplotlib==3.9.1
|
81 |
+
matplotlib-inline==0.1.7
|
82 |
mdurl==0.1.2
|
83 |
more-itertools==10.3.0
|
84 |
mpmath==1.3.0
|
85 |
multidict==6.0.5
|
86 |
multiprocess==0.70.16
|
87 |
murmurhash==1.0.10
|
88 |
+
nest-asyncio==1.6.0
|
89 |
networkx==3.3
|
90 |
nltk==3.8.1
|
91 |
numpy==1.26.4
|
|
|
93 |
outcome==1.3.0.post0
|
94 |
packaging==24.1
|
95 |
pandas==2.2.2
|
96 |
+
parso==0.8.4
|
97 |
+
pexpect==4.9.0
|
98 |
pillow==10.4.0
|
99 |
+
platformdirs==4.2.2
|
100 |
preshed==3.0.9
|
101 |
prometheus_client==0.20.0
|
102 |
+
prompt_toolkit==3.0.47
|
103 |
protobuf==5.27.2
|
104 |
+
psutil==6.0.0
|
105 |
+
ptyprocess==0.7.0
|
106 |
+
pure_eval==0.2.3
|
107 |
pyarrow==17.0.0
|
108 |
pyarrow-hotfix==0.6
|
109 |
pydantic==1.10.9
|
|
|
117 |
python-dotenv==1.0.1
|
118 |
pytz==2024.1
|
119 |
PyYAML==6.0
|
120 |
+
pyzmq==26.0.3
|
121 |
referencing==0.35.1
|
122 |
regex==2024.7.24
|
123 |
requests==2.32.3
|
|
|
152 |
srsly==2.4.8
|
153 |
st-annotated-text==4.0.1
|
154 |
st-theme==1.2.3
|
155 |
+
stack-data==0.6.3
|
156 |
starlette==0.27.0
|
157 |
streamlit==1.36.0
|
158 |
streamlit-camera-input-live==0.2.0
|
|
|
175 |
torch==2.4.0
|
176 |
tornado==6.4.1
|
177 |
tqdm==4.66.4
|
178 |
+
traitlets==5.14.3
|
179 |
transformers==4.43.2
|
180 |
trio==0.26.0
|
181 |
trio-websocket==0.11.1
|
|
|
187 |
uvicorn==0.22.0
|
188 |
validators==0.33.0
|
189 |
wasabi==1.1.3
|
190 |
+
wcwidth==0.2.13
|
191 |
weasel==0.4.1
|
192 |
websocket-client==1.8.0
|
193 |
wrapt==1.16.0
|
src/openfactcheck/__init__.py
CHANGED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from . import templates
|
2 |
+
from . import solvers
|
src/openfactcheck/app/app.py
CHANGED
@@ -60,4 +60,4 @@ if __name__ == "__main__":
|
|
60 |
args = parse_args()
|
61 |
|
62 |
app = App()
|
63 |
-
app.run(
|
|
|
60 |
args = parse_args()
|
61 |
|
62 |
app = App()
|
63 |
+
app.run(args.config_path)
|
src/openfactcheck/core/cli.py
CHANGED
@@ -19,10 +19,4 @@ def parse_args():
|
|
19 |
if __name__ == "__main__":
|
20 |
args = parse_args()
|
21 |
|
22 |
-
ofc = OpenFactCheck(OpenFactCheckConfig(args.config_path))
|
23 |
-
|
24 |
-
#result = ofc("Pakistan is a country in Asia")
|
25 |
-
|
26 |
-
#print(result)
|
27 |
-
|
28 |
-
|
|
|
19 |
if __name__ == "__main__":
|
20 |
args = parse_args()
|
21 |
|
22 |
+
ofc = OpenFactCheck(OpenFactCheckConfig(args.config_path))
|
|
|
|
|
|
|
|
|
|
|
|
src/openfactcheck/lib/config.py
CHANGED
@@ -6,12 +6,60 @@ import transformers
|
|
6 |
from pathlib import Path
|
7 |
from typing import Union
|
8 |
from collections import namedtuple
|
|
|
9 |
|
10 |
-
from .logger import logger, set_logger_level
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
class OpenFactCheckConfig:
|
13 |
"""
|
14 |
Class to load the OpenFactCheck configuration from a JSON or YAML file.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
"""
|
16 |
def __init__(self, filename: Union[str, Path] = "config.json"):
|
17 |
# Setup Logger
|
@@ -27,6 +75,7 @@ class OpenFactCheckConfig:
|
|
27 |
"verifier"])
|
28 |
|
29 |
# Define Attributes
|
|
|
30 |
self.retries = 0
|
31 |
self.pipeline = None
|
32 |
self.solver_configs = None
|
@@ -36,80 +85,103 @@ class OpenFactCheckConfig:
|
|
36 |
self.verbose = ""
|
37 |
|
38 |
try:
|
39 |
-
#
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
# Initialize Solver Paths
|
58 |
-
if 'solver_paths' in config:
|
59 |
-
self.solver_paths = config['solver_paths']
|
60 |
-
else:
|
61 |
-
self.logger.warning("Solver paths missing or incomplete in the configuration file.")
|
62 |
-
self.solver_paths = None
|
63 |
-
|
64 |
-
# Initialize Output Path
|
65 |
-
if 'output_path' in config:
|
66 |
-
self.output_path = config['output_path']
|
67 |
-
os.makedirs(self.output_path, exist_ok=True)
|
68 |
-
else:
|
69 |
-
self.logger.warning("Output path missing or incomplete in the configuration file. Using default path.")
|
70 |
-
self.output_path = "tmp/output"
|
71 |
-
os.makedirs(self.output_path, exist_ok=True)
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
except FileNotFoundError:
|
115 |
self.logger.error(f"Config file not found: {self.filename}")
|
@@ -119,10 +191,77 @@ class OpenFactCheckConfig:
|
|
119 |
self.logger.error(f"Invalid JSON in config file: {self.filename}")
|
120 |
raise ValueError(f"Invalid JSON in config file: {self.filename}")
|
121 |
|
|
|
|
|
|
|
|
|
122 |
except Exception as e:
|
123 |
self.logger.error(f"Unexpected error loading config file: {e}")
|
124 |
raise Exception(f"Unexpected error loading config file: {e}")
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
class SolversConfig:
|
127 |
"""
|
128 |
Class to load the solvers configuration from one or more JSON or YAML files.
|
@@ -140,10 +279,10 @@ class SolversConfig:
|
|
140 |
|
141 |
try:
|
142 |
if isinstance(self.filename_s, (str, Path)):
|
143 |
-
self.
|
144 |
elif isinstance(self.filename_s, list):
|
145 |
for filename in self.filename_s:
|
146 |
-
self.
|
147 |
else:
|
148 |
self.logger.error(f"Invalid filename type: {type(self.filename_s)}")
|
149 |
raise ValueError(f"Invalid filename type: {type(self.filename_s)}")
|
@@ -158,7 +297,7 @@ class SolversConfig:
|
|
158 |
self.logger.error(f"Unexpected error loading solvers file: {e}")
|
159 |
raise Exception(f"Unexpected error loading solvers file: {e}")
|
160 |
|
161 |
-
def
|
162 |
with open(filename, encoding="utf-8") as file:
|
163 |
if filename.endswith(".yaml"):
|
164 |
file_data = yaml.load(file, Loader=yaml.FullLoader)
|
@@ -170,7 +309,12 @@ class SolversConfig:
|
|
170 |
|
171 |
# Merge current file data into existing solvers dictionary
|
172 |
self.solvers.update(file_data)
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
def __call__(self):
|
176 |
return self.solvers
|
|
|
6 |
from pathlib import Path
|
7 |
from typing import Union
|
8 |
from collections import namedtuple
|
9 |
+
from importlib import resources as pkg_resources
|
10 |
|
11 |
+
from openfactcheck.lib.logger import logger, set_logger_level
|
12 |
+
from openfactcheck.lib.errors import ConfigValidationError
|
13 |
+
from openfactcheck import templates as solver_config_templates_dir
|
14 |
+
from openfactcheck import solvers as solver_templates_dir
|
15 |
+
|
16 |
+
# Import solver configuration templates
|
17 |
+
solver_config_templates_path = pkg_resources.files(solver_config_templates_dir) / 'solver_configs'
|
18 |
+
with solver_config_templates_path as solver_config_templates_dir_path:
|
19 |
+
solver_config_template_files = [str(f) for f in solver_config_templates_dir_path.iterdir()]
|
20 |
+
|
21 |
+
# Import default solvers
|
22 |
+
# TODO: Currently, only webservice solvers are supported as default solvers
|
23 |
+
solver_templates_paths = [
|
24 |
+
str(pkg_resources.files(solver_templates_dir) / 'webservice')
|
25 |
+
]
|
26 |
|
27 |
class OpenFactCheckConfig:
|
28 |
"""
|
29 |
Class to load the OpenFactCheck configuration from a JSON or YAML file.
|
30 |
+
|
31 |
+
Parameters
|
32 |
+
----------
|
33 |
+
filename: str, Path
|
34 |
+
The path to the configuration file.
|
35 |
+
|
36 |
+
Attributes
|
37 |
+
----------
|
38 |
+
retries: int
|
39 |
+
Number of retries for the pipeline components.
|
40 |
+
pipeline: namedtuple
|
41 |
+
Namedtuple containing the pipeline components.
|
42 |
+
solver_configs: dict
|
43 |
+
Dictionary containing the solver configurations.
|
44 |
+
solver_paths: dict
|
45 |
+
Dictionary containing the paths to the solver models.
|
46 |
+
output_path: str
|
47 |
+
Path to the output directory.
|
48 |
+
secrets: namedtuple
|
49 |
+
Namedtuple containing the API keys.
|
50 |
+
verbose: str
|
51 |
+
The verbosity level for the logger.
|
52 |
+
|
53 |
+
Methods
|
54 |
+
-------
|
55 |
+
solver_configuration(solver: str = None) -> dict:
|
56 |
+
Get the solver configuration for a specific solver or all solvers.
|
57 |
+
validate():
|
58 |
+
Validate the configuration file
|
59 |
+
|
60 |
+
Examples
|
61 |
+
--------
|
62 |
+
>>> config = OpenFactCheckConfig("config.json")
|
63 |
"""
|
64 |
def __init__(self, filename: Union[str, Path] = "config.json"):
|
65 |
# Setup Logger
|
|
|
75 |
"verifier"])
|
76 |
|
77 |
# Define Attributes
|
78 |
+
self.config = None
|
79 |
self.retries = 0
|
80 |
self.pipeline = None
|
81 |
self.solver_configs = None
|
|
|
85 |
self.verbose = ""
|
86 |
|
87 |
try:
|
88 |
+
# Check if the file exists
|
89 |
+
if Path(self.filename).exists():
|
90 |
+
# Loading Config File
|
91 |
+
with open(self.filename, encoding="utf-8") as file:
|
92 |
+
self.config = json.load(file)
|
93 |
+
self.logger.info(f"Config file loaded successfully from {self.filename}")
|
94 |
+
else:
|
95 |
+
# Create a dummy configuration file
|
96 |
+
self.logger.warning(f"Config file not found: {self.filename}")
|
97 |
+
self.config = {}
|
98 |
+
|
99 |
+
# Initialize Retries
|
100 |
+
if 'retries' in self.config:
|
101 |
+
self.retries = self.config['retries']
|
102 |
+
else:
|
103 |
+
self.logger.warning("No retries found in the configuration file. Using default value of 3.")
|
104 |
+
self.retries = 3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
# Initialize template solvers along with the user-defined solvers
|
107 |
+
# User defined solvers will override the template solvers
|
108 |
+
if 'solver_configs' in self.config:
|
109 |
+
self.solver_configs = SolversConfig(solver_config_template_files + self.config['solver_configs'])()
|
110 |
+
else:
|
111 |
+
self.logger.warning("No solver configurations found in the configuration file. Using default templates only.")
|
112 |
+
self.solver_configs = SolversConfig(solver_config_template_files)()
|
113 |
+
|
114 |
+
# Initialize template solver paths along with the user-defined solver paths
|
115 |
+
if 'solver_paths' in self.config:
|
116 |
+
self.solver_paths = solver_templates_paths + self.config['solver_paths']
|
117 |
+
else:
|
118 |
+
self.logger.warning("No solver paths found in the configuration file. Using default solver paths only.")
|
119 |
+
self.solver_paths = solver_templates_paths
|
120 |
+
|
121 |
+
# Initialize Output Path
|
122 |
+
if 'output_path' in self.config:
|
123 |
+
self.output_path = self.config['output_path']
|
124 |
+
os.makedirs(self.output_path, exist_ok=True)
|
125 |
+
else:
|
126 |
+
self.logger.warning("No output path found in the configuration file. Using default output path 'tmp/output'.")
|
127 |
+
self.output_path = "tmp/output"
|
128 |
+
os.makedirs(self.output_path, exist_ok=True)
|
129 |
|
130 |
+
# Initialize Pipeline config
|
131 |
+
if 'pipeline' in self.config:
|
132 |
+
self.pipeline = Pipeline(claimprocessor=self.config['pipeline']['claimprocessor'],
|
133 |
+
retriever=self.config['pipeline']['retriever'],
|
134 |
+
verifier=self.config['pipeline']['verifier'])
|
135 |
+
else:
|
136 |
+
if self.solver_configs:
|
137 |
+
solvers = list(self.solver_configs.keys())
|
138 |
+
claimprocessor = None
|
139 |
+
retriever = None
|
140 |
+
verifier = None
|
141 |
+
for solver in solvers:
|
142 |
+
if 'claimprocessor' in solver:
|
143 |
+
claimprocessor = solver
|
144 |
+
if 'retriever' in solver:
|
145 |
+
retriever = solver
|
146 |
+
if 'verifier' in solver:
|
147 |
+
verifier = solver
|
148 |
+
if claimprocessor and retriever and verifier:
|
149 |
+
break
|
150 |
+
self.pipeline = Pipeline(claimprocessor=claimprocessor, retriever=retriever, verifier=verifier)
|
151 |
+
self.logger.warning(f"No pipeline found in the configuration file. Using first solver as default pipeline. ClaimProcessor: {claimprocessor}, Retriever: {retriever}, Verifier: {verifier}")
|
152 |
+
|
153 |
+
# Initialize Secrets config
|
154 |
+
if 'secrets' in self.config:
|
155 |
+
self.secrets = Secrets(openai_api_key=self.config['secrets']['openai_api_key'],
|
156 |
+
serper_api_key=self.config['secrets']['serper_api_key'],
|
157 |
+
azure_search_key=self.config['secrets']['azure_search_key'])
|
158 |
+
else:
|
159 |
+
self.logger.warning("No secrets found in the configuration file. Make sure to set the environment variables.")
|
160 |
+
self.secrets = Secrets(openai_api_key=None, serper_api_key=None, azure_search_key=None)
|
161 |
+
|
162 |
+
# Initialize Environment Variables
|
163 |
+
if self.secrets.openai_api_key:
|
164 |
+
os.environ['OPENAI_API_KEY'] = self.secrets.openai_api_key
|
165 |
+
if self.secrets.serper_api_key:
|
166 |
+
os.environ['SERPER_API_KEY'] = self.secrets.serper_api_key
|
167 |
+
if self.secrets.azure_search_key:
|
168 |
+
os.environ['AZURE_SEARCH_KEY'] = self.secrets.azure_search_key
|
169 |
+
|
170 |
+
# Initialize Verbose
|
171 |
+
if 'verbose' in self.config:
|
172 |
+
self.verbose = self.config['verbose']
|
173 |
+
set_logger_level(self.logger, self.verbose)
|
174 |
+
else:
|
175 |
+
self.logger.warning("No verbose level found in the configuration file. Using default level 'INFO'.")
|
176 |
+
self.verbose = "INFO"
|
177 |
+
set_logger_level(self.logger, "INFO")
|
178 |
+
|
179 |
+
# Validate Configuration
|
180 |
+
self.validate()
|
181 |
+
|
182 |
+
# Disable Transformers and Datasets logging
|
183 |
+
transformers.logging.set_verbosity_error()
|
184 |
+
datasets.logging.set_verbosity_error()
|
185 |
|
186 |
except FileNotFoundError:
|
187 |
self.logger.error(f"Config file not found: {self.filename}")
|
|
|
191 |
self.logger.error(f"Invalid JSON in config file: {self.filename}")
|
192 |
raise ValueError(f"Invalid JSON in config file: {self.filename}")
|
193 |
|
194 |
+
except ConfigValidationError as e:
|
195 |
+
self.logger.error(f"Configuration validation failed: {e}")
|
196 |
+
raise ConfigValidationError(f"Configuration validation failed: {e}")
|
197 |
+
|
198 |
except Exception as e:
|
199 |
self.logger.error(f"Unexpected error loading config file: {e}")
|
200 |
raise Exception(f"Unexpected error loading config file: {e}")
|
201 |
|
202 |
+
def validate(self):
|
203 |
+
"""
|
204 |
+
Validate the configuration file.
|
205 |
+
|
206 |
+
Raises
|
207 |
+
------
|
208 |
+
ValueError
|
209 |
+
If the configuration file is invalid.
|
210 |
+
|
211 |
+
Examples
|
212 |
+
--------
|
213 |
+
>>> config = OpenFactCheckConfig("config.json")
|
214 |
+
>>> config.validate()
|
215 |
+
"""
|
216 |
+
# Check for environment variables
|
217 |
+
if 'OPENAI_API_KEY' not in os.environ:
|
218 |
+
self.logger.warning("OPENAI_API_KEY environment variable not found.")
|
219 |
+
raise ConfigValidationError("OPENAI_API_KEY environment variable not found.")
|
220 |
+
if 'SERPER_API_KEY' not in os.environ:
|
221 |
+
self.logger.warning("SERPER_API_KEY environment variable not found.")
|
222 |
+
raise ConfigValidationError("SERPER_API_KEY environment variable not found.")
|
223 |
+
if 'AZURE_SEARCH_KEY' not in os.environ:
|
224 |
+
self.logger.warning("AZURE_SEARCH_KEY environment variable not found.")
|
225 |
+
raise ConfigValidationError("AZURE_SEARCH_KEY environment variable not found.")
|
226 |
+
|
227 |
+
|
228 |
+
|
229 |
+
def solver_configuration(self, solver: str = None) -> dict:
|
230 |
+
"""
|
231 |
+
Get the solver configuration for a specific solver or all solvers.
|
232 |
+
|
233 |
+
Parameters
|
234 |
+
----------
|
235 |
+
solver: str
|
236 |
+
The name of the solver to get the configuration for.
|
237 |
+
If not provided, returns the configuration for all solvers.
|
238 |
+
|
239 |
+
Returns
|
240 |
+
-------
|
241 |
+
dict
|
242 |
+
The configuration for the specified solver or all solvers.
|
243 |
+
|
244 |
+
Raises
|
245 |
+
------
|
246 |
+
ConfigValidationError
|
247 |
+
If the configuration validation fails.
|
248 |
+
|
249 |
+
Examples
|
250 |
+
--------
|
251 |
+
>>> config = OpenFactCheckConfig("config.json")
|
252 |
+
>>> config.solver_configuration()
|
253 |
+
>>> config.solver_configuration("factcheckgpt_claimprocessor")
|
254 |
+
"""
|
255 |
+
if solver:
|
256 |
+
if solver in self.solver_configs:
|
257 |
+
return self.solver_configs[solver]
|
258 |
+
else:
|
259 |
+
self.logger.error(f"Solver not found: {solver}")
|
260 |
+
raise ValueError(f"Solver not found: {solver}")
|
261 |
+
else:
|
262 |
+
return self.solver_configs
|
263 |
+
|
264 |
+
|
265 |
class SolversConfig:
|
266 |
"""
|
267 |
Class to load the solvers configuration from one or more JSON or YAML files.
|
|
|
279 |
|
280 |
try:
|
281 |
if isinstance(self.filename_s, (str, Path)):
|
282 |
+
self.load_config(self.filename_s)
|
283 |
elif isinstance(self.filename_s, list):
|
284 |
for filename in self.filename_s:
|
285 |
+
self.load_config(filename)
|
286 |
else:
|
287 |
self.logger.error(f"Invalid filename type: {type(self.filename_s)}")
|
288 |
raise ValueError(f"Invalid filename type: {type(self.filename_s)}")
|
|
|
297 |
self.logger.error(f"Unexpected error loading solvers file: {e}")
|
298 |
raise Exception(f"Unexpected error loading solvers file: {e}")
|
299 |
|
300 |
+
def load_config(self, filename: Union[str, Path]):
|
301 |
with open(filename, encoding="utf-8") as file:
|
302 |
if filename.endswith(".yaml"):
|
303 |
file_data = yaml.load(file, Loader=yaml.FullLoader)
|
|
|
309 |
|
310 |
# Merge current file data into existing solvers dictionary
|
311 |
self.solvers.update(file_data)
|
312 |
+
|
313 |
+
# Log the loaded configuration pattern
|
314 |
+
if 'template' in filename:
|
315 |
+
self.logger.info(f"Template solver configuration loaded: {filename.split('/')[-1]}")
|
316 |
+
else:
|
317 |
+
self.logger.info(f"User-defined solver configuration loaded from: {filename}")
|
318 |
|
319 |
def __call__(self):
|
320 |
return self.solvers
|
src/openfactcheck/lib/errors.py
CHANGED
@@ -2,4 +2,7 @@ class Error(Exception):
|
|
2 |
"""Base class for other exceptions"""
|
3 |
|
4 |
class ConfigError(Error):
|
5 |
-
"""Raised when there is an error with the configurations"""
|
|
|
|
|
|
|
|
2 |
"""Base class for other exceptions"""
|
3 |
|
4 |
class ConfigError(Error):
|
5 |
+
"""Raised when there is an error with the configurations"""
|
6 |
+
|
7 |
+
class ConfigValidationError(Error):
|
8 |
+
"""Raised when configuration validation fails"""
|
src/openfactcheck/solvers/{solvers.py → __init__.py}
RENAMED
File without changes
|
src/openfactcheck/templates/__init__.py
ADDED
File without changes
|
{configs/solvers → src/openfactcheck/templates/solver_configs}/dummy.yaml
RENAMED
File without changes
|
{configs/solvers → src/openfactcheck/templates/solver_configs}/factcheckgpt.yaml
RENAMED
File without changes
|
{configs/solvers → src/openfactcheck/templates/solver_configs}/factool.yaml
RENAMED
File without changes
|
{configs/solvers → src/openfactcheck/templates/solver_configs}/rarr.yaml
RENAMED
File without changes
|