mmmay0722 commited on
Commit
aeabbe4
·
1 Parent(s): c241ebb

copy webqa-agent file

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .github/md-link-config.json +26 -0
  2. .github/workflows/build-and-push.yml +135 -0
  3. .pre-commit-config.yaml +90 -0
  4. Dockerfile +23 -5
  5. LICENSE +201 -0
  6. README.md +0 -18
  7. README_zh-CN.md +175 -0
  8. config/config.yaml.example +35 -0
  9. docs/images/webqa.svg +0 -0
  10. package.json +19 -0
  11. requirements.txt +15 -0
  12. tests/conftest.py +16 -0
  13. tests/mocks/action_mocks.json +576 -0
  14. tests/mocks/actions_negative_mocks.json +89 -0
  15. tests/test_action_executor.py +300 -0
  16. tests/test_crawler.py +299 -0
  17. tests/test_loading_animation.py +7 -0
  18. tests/test_pages/dropdown_components.html +194 -0
  19. webqa-agent.py +406 -0
  20. webqa_agent/__init__.py +0 -0
  21. webqa_agent/actions/__init__.py +0 -0
  22. webqa_agent/actions/action_executor.py +338 -0
  23. webqa_agent/actions/action_handler.py +1431 -0
  24. webqa_agent/actions/click_handler.py +339 -0
  25. webqa_agent/actions/scroll_handler.py +365 -0
  26. webqa_agent/browser/check.py +300 -0
  27. webqa_agent/browser/config.py +17 -0
  28. webqa_agent/browser/driver.py +137 -0
  29. webqa_agent/browser/session.py +195 -0
  30. webqa_agent/crawler/__init__.py +0 -0
  31. webqa_agent/crawler/crawl.py +97 -0
  32. webqa_agent/crawler/deep_crawler.py +519 -0
  33. webqa_agent/crawler/dom_cacher.py +94 -0
  34. webqa_agent/crawler/dom_tree.py +353 -0
  35. webqa_agent/crawler/js/element_detector.js +1030 -0
  36. webqa_agent/crawler/js/marker_remover.js +15 -0
  37. webqa_agent/crawler/js/text_extractor.js +182 -0
  38. webqa_agent/data/__init__.py +11 -0
  39. webqa_agent/data/test_structures.py +333 -0
  40. webqa_agent/executor/__init__.py +20 -0
  41. webqa_agent/executor/parallel_executor.py +354 -0
  42. webqa_agent/executor/parallel_mode.py +131 -0
  43. webqa_agent/executor/result_aggregator.py +366 -0
  44. webqa_agent/executor/test_runners.py +888 -0
  45. webqa_agent/llm/llm_api.py +135 -0
  46. webqa_agent/llm/prompt.py +745 -0
  47. webqa_agent/static/assets/index.js +0 -0
  48. webqa_agent/static/assets/index_en-US.js +0 -0
  49. webqa_agent/static/assets/style.css +1 -0
  50. webqa_agent/static/i18n/en-US.json +127 -0
.github/md-link-config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ignorePatterns": [
3
+ {
4
+ "pattern": "^http://localhost"
5
+ },
6
+ {
7
+ "pattern": "^http://0.0.0.0"
8
+ },
9
+ {
10
+ "pattern": "^https://github.com/user-attachments/assets/"
11
+ }
12
+ ],
13
+ "httpHeaders": [
14
+ {
15
+ "urls": ["https://github.com/", "https://guides.github.com/", "https://help.github.com/", "https://docs.github.com/"],
16
+ "headers": {
17
+ "Accept-Encoding": "zstd, br, gzip, deflate"
18
+ }
19
+ }
20
+ ],
21
+ "timeout": "20s",
22
+ "retryOn429": true,
23
+ "retryCount": 5,
24
+ "fallbackRetryDelay": "30s",
25
+ "aliveStatusCodes": [200, 206, 429]
26
+ }
.github/workflows/build-and-push.yml ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build and Push Docker Images
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ env:
9
+ DOCKERHUB_USERNAME: mmmay0722
10
+ DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
11
+ REGISTRY: docker.io
12
+
13
+ jobs:
14
+ build-and-push:
15
+ runs-on: ubuntu-latest
16
+ timeout-minutes: 60
17
+
18
+ steps:
19
+ - name: Checkout code
20
+ uses: actions/checkout@v4
21
+
22
+ - name: Set up Docker Buildx
23
+ uses: docker/setup-buildx-action@v3
24
+
25
+ - name: Log in to Docker Hub
26
+ uses: docker/login-action@v3
27
+ with:
28
+ registry: ${{ env.REGISTRY }}
29
+ username: ${{ env.DOCKERHUB_USERNAME }}
30
+ password: ${{ env.DOCKERHUB_TOKEN }}
31
+
32
+ - name: Extract tag name
33
+ id: extract_tag
34
+ run: |
35
+ if [[ $GITHUB_REF == refs/tags/* ]]; then
36
+ TAG_NAME=${GITHUB_REF#refs/tags/}
37
+ echo "is_tag=true" >> $GITHUB_OUTPUT
38
+ else
39
+ TAG_NAME="latest"
40
+ echo "is_tag=false" >> $GITHUB_OUTPUT
41
+ fi
42
+ echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
43
+ echo "Extracted tag: $TAG_NAME"
44
+
45
+ - name: Build and push Docker image
46
+ uses: docker/build-push-action@v5
47
+ timeout-minutes: 30
48
+ with:
49
+ context: .
50
+ file: ./Dockerfile
51
+ push: true
52
+ tags: |
53
+ ${{ env.REGISTRY }}/${{ env.DOCKERHUB_USERNAME }}/webqa-agent:${{ steps.extract_tag.outputs.tag_name }}
54
+ ${{ env.REGISTRY }}/${{ env.DOCKERHUB_USERNAME }}/webqa-agent:latest
55
+ cache-from: type=gha
56
+ cache-to: type=gha,mode=max
57
+ platforms: linux/amd64
58
+ provenance: false
59
+ sbom: false
60
+
61
+ create-release:
62
+ needs: build-and-push
63
+ runs-on: ubuntu-latest
64
+ if: startsWith(github.ref, 'refs/tags/v')
65
+ permissions:
66
+ contents: write
67
+ discussions: write
68
+
69
+ steps:
70
+ - name: Checkout code
71
+ uses: actions/checkout@v4
72
+ with:
73
+ fetch-depth: 0
74
+
75
+ - name: Extract tag name
76
+ id: extract_tag
77
+ run: |
78
+ TAG_NAME=${GITHUB_REF#refs/tags/}
79
+ echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
80
+ echo "Current tag: $TAG_NAME"
81
+
82
+ - name: Generate changelog
83
+ id: changelog
84
+ run: |
85
+ CURRENT_TAG=${{ steps.extract_tag.outputs.tag_name }}
86
+ PREVIOUS_TAG=$(git describe --tags --abbrev=0 HEAD~1 2>/dev/null || echo "")
87
+
88
+ echo "## 🚀 Release $CURRENT_TAG" > CHANGELOG.md
89
+ echo "" >> CHANGELOG.md
90
+ echo "### 📦 Docker Images" >> CHANGELOG.md
91
+ echo "- \`${{ env.DOCKERHUB_USERNAME }}/webqa-agent:$CURRENT_TAG\`" >> CHANGELOG.md
92
+ echo "- \`${{ env.DOCKERHUB_USERNAME }}/webqa-agent:latest\`" >> CHANGELOG.md
93
+ echo "" >> CHANGELOG.md
94
+
95
+ if [ -n "$PREVIOUS_TAG" ]; then
96
+ echo "### 📝 Changes since $PREVIOUS_TAG" >> CHANGELOG.md
97
+ git log --pretty=format:"- %s (%h)" $PREVIOUS_TAG..$CURRENT_TAG >> CHANGELOG.md
98
+ else
99
+ echo "### 📝 Initial Release" >> CHANGELOG.md
100
+ echo "This is the initial release of WebQA Agent." >> CHANGELOG.md
101
+ fi
102
+
103
+ - name: Create GitHub Release
104
+ uses: softprops/action-gh-release@v1
105
+ with:
106
+ tag_name: ${{ steps.extract_tag.outputs.tag_name }}
107
+ name: Release ${{ steps.extract_tag.outputs.tag_name }}
108
+ body_path: CHANGELOG.md
109
+ draft: false
110
+ prerelease: false
111
+ generate_release_notes: true
112
+ env:
113
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
114
+
115
+ notify:
116
+ needs: [build-and-push, create-release]
117
+ runs-on: ubuntu-latest
118
+ if: always()
119
+
120
+ steps:
121
+ - name: Notify build status
122
+ run: |
123
+ if [ "${{ needs.build-and-push.result }}" == "success" ]; then
124
+ echo "✅ Docker images built and pushed successfully!"
125
+ echo "✅ Both versioned tag and latest tag have been updated!"
126
+ else
127
+ echo "❌ Docker image build failed!"
128
+ exit 1
129
+ fi
130
+
131
+ if [ "${{ needs.create-release.result }}" == "success" ] || [ "${{ needs.create-release.result }}" == "skipped" ]; then
132
+ echo "✅ Release process completed successfully!"
133
+ else
134
+ echo "❌ Release process failed!"
135
+ fi
.pre-commit-config.yaml ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ # - repo: https://github.com/psf/black
3
+ # rev: 25.1.0
4
+ # hooks:
5
+ # - id: black
6
+ # args: ["--line-length=120"]
7
+ - repo: https://github.com/PyCQA/flake8
8
+ rev: 7.2.0
9
+ hooks:
10
+ - id: flake8
11
+ args: [
12
+ "--select=E,F",
13
+ "--ignore=E741,E731,E722,E721,E501,E402,E266,E265,E262,E203,F403,F405",
14
+ "--exclude=**/__init__.py",
15
+ "--max-line-length=120"
16
+ ]
17
+ - repo: https://github.com/PyCQA/isort
18
+ rev: 6.0.1
19
+ hooks:
20
+ - id: isort
21
+ - repo: https://github.com/codespell-project/codespell
22
+ rev: v2.4.1
23
+ hooks:
24
+ - id: codespell
25
+ args: ["--skip=*.po,*.ts,*.css", "--ignore-regex=\\b\\w{2,3}\\b"]
26
+ - repo: https://github.com/pre-commit/pre-commit-hooks
27
+ rev: v4.3.0
28
+ hooks:
29
+ - id: trailing-whitespace
30
+ - id: check-yaml
31
+ - id: end-of-file-fixer
32
+ - id: requirements-txt-fixer
33
+ - id: double-quote-string-fixer
34
+ - id: check-merge-conflict
35
+ - id: fix-encoding-pragma
36
+ args: ["--remove"]
37
+ - id: mixed-line-ending
38
+ args: ["--fix=lf"]
39
+ - id: detect-private-key
40
+ - id: check-json
41
+ - repo: https://github.com/executablebooks/mdformat
42
+ rev: 0.7.9
43
+ hooks:
44
+ - id: mdformat
45
+ args: ["--number", "--table-width", "200"]
46
+ additional_dependencies:
47
+ - mdformat-openmmlab
48
+ - mdformat_frontmatter
49
+ - linkify-it-py
50
+ - repo: https://github.com/myint/docformatter
51
+ rev: v1.7.7
52
+ hooks:
53
+ - id: docformatter
54
+ language: python
55
+ args: ["--in-place", "--wrap-descriptions", "79"]
56
+ - repo: https://github.com/jackdewinter/pymarkdown
57
+ rev: v0.9.30
58
+ hooks:
59
+ - id: pymarkdown
60
+ args: [fix]
61
+ - repo: https://github.com/gitleaks/gitleaks
62
+ rev: v8.27.0
63
+ hooks:
64
+ - id: gitleaks
65
+ entry: "gitleaks dir"
66
+ args: [
67
+ "--verbose",
68
+ "--redact=50"
69
+ ]
70
+ - repo: https://github.com/PyCQA/pylint/
71
+ rev: v3.3.7
72
+ hooks:
73
+ - id: pylint
74
+ name: pylint
75
+ entry: pylint
76
+ language: system
77
+ types: [python]
78
+ require_serial: false
79
+ args:
80
+ [ "--jobs=4",
81
+ "--disable=all",
82
+ "--enable=E,F",
83
+ "--disable=E0401,E0402,E0102,E1101",
84
+ "-sn"
85
+ ]
86
+ - repo: https://github.com/tcort/markdown-link-check
87
+ rev: v3.12.2
88
+ hooks:
89
+ - id: markdown-link-check
90
+ args: [-q,-c, ./.github/md-link-config.json]
Dockerfile CHANGED
@@ -1,10 +1,28 @@
1
- FROM mmmay0722/webqa-agent:latest
2
 
 
3
  WORKDIR /app
4
 
5
- COPY app.py .
6
- COPY app_gradio/ ./app_gradio/
 
 
 
 
 
 
7
 
8
- EXPOSE 7860
 
 
 
9
 
10
- CMD ["/usr/bin/python3", "app.py"]
 
 
 
 
 
 
 
 
 
1
+ FROM mcr.microsoft.com/playwright/python:v1.52.0-noble
2
 
3
+ # Set working directory
4
  WORKDIR /app
5
 
6
+ # Install Node.js, npm, and necessary tools
7
+ RUN apt-get update && apt-get install -y \
8
+ curl \
9
+ unzip \
10
+ wget \
11
+ && curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
12
+ && apt-get install -y nodejs \
13
+ && rm -rf /var/lib/apt/lists/*
14
 
15
+ # Optimize pip configuration and network settings
16
+ RUN pip config set global.index-url https://pypi.org/simple && \
17
+ pip config set global.timeout 300 && \
18
+ pip config set global.retries 5
19
 
20
+ # Copy Python dependency file and install
21
+ COPY requirements.txt /app/
22
+ RUN pip install --no-cache-dir --default-timeout=300 -r requirements.txt
23
+
24
+ # Copy project files
25
+ COPY . /app
26
+
27
+ # Set to run app.py for Gradio interface
28
+ CMD ["python", "app.py"]
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -1,21 +1,3 @@
1
- ---
2
- title: WebQA Agent
3
- emoji: 🤖
4
- colorFrom: blue
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- tags:
10
- - web-testing
11
- - vibecoding
12
- - automation
13
- - qa
14
- - llm
15
- - gradio
16
- ---
17
-
18
-
19
  # WebQA Agent
20
 
21
  <!-- badges -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # WebQA Agent
2
 
3
  <!-- badges -->
README_zh-CN.md ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # WebQA Agent
2
+
3
+ <!-- badges -->
4
+ <p align="left">
5
+ <a href="https://github.com/MigoXLab/webqa-agent/blob/main/LICENSE"><img src="https://img.shields.io/github/license/MigoXLab/webqa-agent" alt="License"></a>
6
+ <a href="https://github.com/MigoXLab/webqa-agent/stargazers"><img src="https://img.shields.io/github/stars/MigoXLab/webqa-agent" alt="GitHub stars"></a>
7
+ <a href="https://github.com/MigoXLab/webqa-agent/network/members"><img src="https://img.shields.io/github/forks/MigoXLab/webqa-agent" alt="GitHub forks"></a>
8
+ <a href="https://github.com/MigoXLab/webqa-agent/issues"><img src="https://img.shields.io/github/issues/MigoXLab/webqa-agent" alt="GitHub issues"></a>
9
+ <a href="https://deepwiki.com/MigoXLab/webqa-agent"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
10
+ </p>
11
+
12
+ [English](README.md) · [简体中文](README_zh-CN.md)
13
+
14
+ **WebQA Agent** 是全自动网页评估测试 Agent,一键诊断性能、安全、功能与交互体验
15
+
16
+ ## 🚀 核心特性
17
+
18
+ ### 🧭 功能介绍
19
+
20
+ <p>
21
+ <img src="docs/images/webqa.svg" alt="WebQA Agent 业务功能图" />
22
+ </p>
23
+
24
+ ### 📋 特性概览
25
+
26
+ - **🤖 AI智能测试**:WebQA-Agent能够自主进行网站测试,从页面抓取、用例生成与执行,实现端到端功能测试自动化
27
+ - **📊 多维度评估**:覆盖功能、性能、用户体验、安全等核心测试场景,评估页面加载速度、设计细节和链接,全面保障系统质量
28
+ - **🎯 精准诊断**:基于真实浏览器环境的深度测试,提供可操作的优化建议
29
+ - **📈 可视化报告**:生成详细的HTML测试报告,多维度、可视化展示测试结果,便于分析与追踪
30
+
31
+ ## 📌 测试案例
32
+
33
+ <p align="center">
34
+ <img src="https://github.com/user-attachments/assets/b75f18bf-8b92-498e-b5e1-7c4dc5cd33f5" alt="AI 功能测试" width="45%"/>
35
+ &nbsp;
36
+ <img src="https://github.com/user-attachments/assets/560cd99d-1213-47b9-82dc-52d3f2d1c1e7" alt="其他测试" width="45%"/>
37
+ </p>
38
+
39
+ <p align="center">
40
+ <b>左:AI 智能测试 全自动执行流程</b> | <b>右:覆盖多类测试场景</b>
41
+ </p>
42
+
43
+ ## 安装与配置
44
+
45
+ ### 🚀 Docker一键启动
46
+
47
+ 在开始之前,请确保已安装 Docker。如未安装,请参考官方安装指南:[Docker 安装指南](https://docs.docker.com/get-started/get-docker/)。
48
+
49
+ ```bash
50
+ # 1. 下载配置文件模板
51
+ mkdir -p config && curl -fsSL https://raw.githubusercontent.com/MigoXLab/webqa-agent/main/config/config.yaml.example -o config/config.yaml
52
+
53
+ # 2. 编辑配置文件
54
+ # 设置 target.url、llm_config.api_key 等参数
55
+
56
+ # 3. 一键启动
57
+ curl -fsSL https://raw.githubusercontent.com/MigoXLab/webqa-agent/main/start.sh | bash
58
+ ```
59
+
60
+ ### 源码安装
61
+
62
+ ```bash
63
+ git clone https://github.com/MigoXLab/webqa-agent.git
64
+ cd webqa-agent
65
+ ```
66
+
67
+ 安装 Python >= 3.10,运行以下命令:
68
+
69
+ ```bash
70
+ pip install -r requirements.txt
71
+ playwright install
72
+
73
+ ```
74
+
75
+ 性能测试 - Lighthouse 安装(可选)
76
+
77
+ ```bash
78
+ # 需要 Node.js >= 18.0.0 package.json
79
+ npm install
80
+
81
+ ```
82
+
83
+ 安全测试 - Nuclei 安装(可选)
84
+
85
+ 下载地址: [Nuclei Releases](https://github.com/projectdiscovery/nuclei/releases/)
86
+
87
+ ```bash
88
+ # MacOS
89
+ brew install nuclei
90
+
91
+ # 其他系统请从上述下载地址获取对应架构的版本
92
+
93
+ # 安装后更新模板并验证
94
+ nuclei -ut -v # 更新 Nuclei 模板
95
+ nuclei -version # 验证安装成功
96
+
97
+ ```
98
+
99
+ 参考“使用说明 > 测试配置”进行 `config/config.yaml` 配置后,运行下方命令。
100
+
101
+ ```bash
102
+ python webqa-agent.py
103
+ ```
104
+
105
+ ## 在线演示
106
+
107
+ 进入ModelScope体验:[WebQA-Agent on ModelScope](https://modelscope.cn/studios/mmmmei22/WebQA-Agent/summary)
108
+
109
+ ## 使用说明
110
+
111
+ ### 测试配置
112
+
113
+ `webqa-agent` 通过 YAML 配置测试运行参数:
114
+
115
+ ```yaml
116
+ target:
117
+ url: https://example.com/ # 需要测试的网站URL
118
+ description: example description
119
+
120
+ test_config: # 测试项配置
121
+ function_test: # 功能测试
122
+ enabled: True
123
+ type: ai # default or ai
124
+ business_objectives: example business objectives # 建议加入测试范围,如:测试搜索功能
125
+ ux_test: # 用户体验测试
126
+ enabled: True
127
+ performance_test: # 性能测试
128
+ enabled: False
129
+ security_test: # 安全测试
130
+ enabled: False
131
+
132
+ llm_config: # 视觉模型配置,当前仅支持 OpenAI SDK 兼容格式
133
+ model: gpt-4.1 # 推荐使用
134
+ api_key: your_api_key
135
+ base_url: https://api.example.com/v1
136
+
137
+ browser_config:
138
+ viewport: {"width": 1280, "height": 720}
139
+ headless: False # Docker环境会自动覆盖为True
140
+ language: zh-CN
141
+ cookies: []
142
+ ```
143
+
144
+ 在配置和运行测试时,请注意以下重要事项:
145
+
146
+ #### 1. 功能测试说明
147
+
148
+ - **AI模式**:当在配置文件中指定生成测试用例的数量时,系统可能会根据实际测试情况进行代理重新规划和调整。这可能导致最终执行的测试用例数量与初始设定存在一定出入,以确保测试的准确性和有效性。
149
+
150
+ - **Default模式**:功能测试的 `default` 模式主要验证UI元素的点击行为是否成功执行,包括按钮点击、链接跳转等基本交互功能。
151
+
152
+ #### 2. 用户体验测试说明
153
+
154
+ UX(用户体验)测试专注于评估网站的交互设计、可用性和用户友好程度。测试结果中包含的模型输出内容是基于用户体验最佳实践提供的改进建议,供开发和设计团队参考优化。
155
+
156
+ ## 查看结果
157
+
158
+ 在 `reports` 目录会生成本次测试的文件夹,打开其中的 HTML 报告即可查看结果。
159
+
160
+ ## RoadMap
161
+
162
+ 1. AI功能测试持续优化:提升覆盖率与准确性
163
+ 2. 功能遍历与页面校验:校验业务逻辑正确性与数据完整性
164
+ 3. 交互与可视化:用例可视化与本地服务实时展示推理过程
165
+ 4. 能力扩展:多模型接入与更多评估维度集成
166
+
167
+ ## 致谢
168
+
169
+ - [natbot](https://github.com/nat/natbot): 通过GPT-3驱动浏览器
170
+ - [Midscene.js](https://github.com/web-infra-dev/midscene/):Web、Android、自动化和测试的AI Operator
171
+ - [browser-use](https://github.com/browser-use/browser-use/):用于浏览器控制的AI Agent
172
+
173
+ ## 开源许可证
174
+
175
+ 该项目采用 [Apache 2.0 开源许可证](LICENSE)。
config/config.yaml.example ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ target:
2
+ url: https://baidu.com
3
+ description: Test search functionality
4
+ # max_concurrent_tests: 2 # Optional, default parallel 2
5
+
6
+ test_config: # Test configuration
7
+ function_test:
8
+ enabled: True
9
+ type: ai # default or ai
10
+ business_objectives: Test Baidu search functionality, generate 3 test cases
11
+ ux_test:
12
+ enabled: True
13
+ performance_test:
14
+ enabled: False
15
+ security_test:
16
+ enabled: False
17
+
18
+ llm_config: # LLM configuration, currently only supports OpenAI SDK compatible format
19
+ model: gpt-4.1 # Recommended
20
+ api_key: your_api_key
21
+ base_url: https://api.example.com/v1
22
+ temperature: 0.1 # Optional, default 0.1
23
+ # top_p: 0.9 # Optional, if not set, this parameter will not be passed
24
+
25
+ browser_config:
26
+ viewport: {"width": 1280, "height": 720}
27
+ headless: False # Docker environment will automatically override to True
28
+ language: zh-CN
29
+ cookies: []
30
+
31
+ report:
32
+ language: en-US # zh-CN, en-US
33
+
34
+ log:
35
+ level: info
docs/images/webqa.svg ADDED
package.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "webqa-agent",
3
+ "version": "0.1.0",
4
+ "description": "WebQA Agent - 全自动网页评估测试",
5
+ "dependencies": {
6
+ "chrome-launcher": "^1.2.0",
7
+ "lighthouse": "^12.8.1"
8
+ },
9
+ "engines": {
10
+ "node": ">=18.0.0"
11
+ },
12
+ "main": "index.js",
13
+ "scripts": {
14
+ "test": "echo \"Error: no test specified\" && exit 1"
15
+ },
16
+ "keywords": [],
17
+ "author": "MigoXLab with ❤",
18
+ "license": "ISC"
19
+ }
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ html2text
2
+ jinja2
3
+ langchain
4
+ langchain-openai
5
+ langgraph
6
+ openai
7
+ playwright==1.52.0
8
+ pydantic
9
+ pytest
10
+ pytest-asyncio
11
+ python-dotenv
12
+ requests
13
+ pillow
14
+ gradio
15
+ pyyaml
tests/conftest.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+
4
+ def pytest_addoption(parser: pytest.Parser) -> None:
5
+ parser.addoption(
6
+ '--url',
7
+ action='store',
8
+ default=None,
9
+ help='Target URL for crawling tests (overrides default)',
10
+ )
11
+
12
+
13
+ @pytest.fixture
14
+ def test_url(request: pytest.FixtureRequest) -> str:
15
+ # Priority: CLI --url > env WEBQA_TEST_URL > default example.com
16
+ return request.config.getoption('--url') or 'https://google.com'
tests/mocks/action_mocks.json ADDED
@@ -0,0 +1,576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Tap": [
3
+ {
4
+ "url": "https://google.com",
5
+ "actions": [
6
+ {
7
+ "type": "Tap",
8
+ "locate": {"id": "11"}
9
+ }
10
+ ],
11
+ "id_map": {
12
+ "11": {
13
+ "node": "ref: <Node>",
14
+ "tagName": "a",
15
+ "className": "pHiOh",
16
+ "type": null,
17
+ "placeholder": null,
18
+ "innerText": "关于 Google",
19
+ "attributes": [
20
+ {
21
+ "name": "class",
22
+ "value": "pHiOh"
23
+ },
24
+ {
25
+ "name": "href",
26
+ "value": "https://about.google/?utm_source=google-ZZ&utm_medium=referral&utm_campaign=hp-footer&fg=1"
27
+ },
28
+ {
29
+ "name": "ping",
30
+ "value": "/url?sa=t&rct=j&source=webhp&url=https://about.google/%3Futm_source%3Dgoogle-ZZ%26utm_medium%3Dreferral%26utm_campaign%3Dhp-footer%26fg%3D1&ved=0ahUKEwiCiufHjYePAxXZEDQIHXHeNyEQkNQCCBQ&opi=89978449"
31
+ }
32
+ ],
33
+ "viewport": {
34
+ "x": 20,
35
+ "y": 670,
36
+ "width": 107.046875,
37
+ "height": 50
38
+ },
39
+ "center_x": 73.5234375,
40
+ "center_y": 695,
41
+ "isVisible": true,
42
+ "isInteractive": true,
43
+ "isValidText": false,
44
+ "isTopElement": true,
45
+ "isInViewport": true,
46
+ "isParentHighlighted": false,
47
+ "xpath": "/html[1]/body[1]/div[2]/div[7]/div[1]/div[1]/div[1]/a[1]",
48
+ "selector": "a.pHiOh",
49
+ "highlightIndex": 11
50
+ }
51
+ }
52
+ },
53
+ {
54
+ "url": "https://arxiv.org/search/",
55
+ "actions": [
56
+ {
57
+ "type": "Tap",
58
+ "locate": {"id": "18"}
59
+ },
60
+ {
61
+ "type": "Tap",
62
+ "locate": {"id": "14"}
63
+ }
64
+ ],
65
+ "id_map": {
66
+ "14": {
67
+ "node": "ref: <Node>",
68
+ "tagName": "button",
69
+ "className": "button is-link is-medium",
70
+ "type": null,
71
+ "placeholder": null,
72
+ "innerText": "Search",
73
+ "attributes": [
74
+ {
75
+ "name": "class",
76
+ "value": "button is-link is-medium"
77
+ }
78
+ ],
79
+ "viewport": {
80
+ "x": 1103.828125,
81
+ "y": 280.203125,
82
+ "width": 83.671875,
83
+ "height": 39.375
84
+ },
85
+ "center_x": 1145.6640625,
86
+ "center_y": 299.890625,
87
+ "isVisible": true,
88
+ "isInteractive": true,
89
+ "isValidText": false,
90
+ "isTopElement": true,
91
+ "isInViewport": true,
92
+ "isParentHighlighted": false,
93
+ "xpath": "/html[1]/body[1]/main[1]/div[2]/form[1]/div[1]/div[3]/button[1]",
94
+ "selector": "button.button.is-link.is-medium",
95
+ "highlightIndex": 14
96
+ },
97
+ "18": {
98
+ "node": "ref: <Node>",
99
+ "tagName": "input",
100
+ "className": null,
101
+ "type": "radio",
102
+ "placeholder": null,
103
+ "innerText": "hide",
104
+ "attributes": [
105
+ {
106
+ "name": "id",
107
+ "value": "abstracts-1"
108
+ },
109
+ {
110
+ "name": "name",
111
+ "value": "abstracts"
112
+ },
113
+ {
114
+ "name": "type",
115
+ "value": "radio"
116
+ },
117
+ {
118
+ "name": "value",
119
+ "value": "hide"
120
+ },
121
+ {
122
+ "name": "style",
123
+ "value": ""
124
+ }
125
+ ],
126
+ "viewport": {
127
+ "x": 186.8125,
128
+ "y": 330.078125,
129
+ "width": 13,
130
+ "height": 13
131
+ },
132
+ "center_x": 193.3125,
133
+ "center_y": 336.578125,
134
+ "isVisible": true,
135
+ "isInteractive": true,
136
+ "isValidText": false,
137
+ "isTopElement": true,
138
+ "isInViewport": true,
139
+ "isParentHighlighted": true,
140
+ "xpath": "//*[@id=\"abstracts-1\"]",
141
+ "selector": "input#abstracts-1",
142
+ "highlightIndex": 18
143
+ }
144
+ }
145
+ }
146
+ ],
147
+ "Hover": [
148
+ {
149
+ "url": "https://demo.chat-sdk.dev/",
150
+ "actions": [
151
+ {
152
+ "type": "Hover",
153
+ "locate": {"id": "6"}
154
+ }
155
+ ],
156
+ "id_map": {
157
+ "6": {
158
+ "node": "ref: <Node>",
159
+ "tagName": "button",
160
+ "className": "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 border border-input bg-background hover:bg-accent hover:text-accent-foreground h-10 py-2 order-2 md:order-1 md:px-2 px-2 md:h-fit ml-auto md:ml-0",
161
+ "type": null,
162
+ "placeholder": null,
163
+ "innerText": "New Chat",
164
+ "attributes": [
165
+ {
166
+ "name": "class",
167
+ "value": "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 border border-input bg-background hover:bg-accent hover:text-accent-foreground h-10 py-2 order-2 md:order-1 md:px-2 px-2 md:h-fit ml-auto md:ml-0"
168
+ },
169
+ {
170
+ "name": "data-state",
171
+ "value": "closed"
172
+ }
173
+ ],
174
+ "viewport": {
175
+ "x": 50,
176
+ "y": 6,
177
+ "width": 34,
178
+ "height": 34
179
+ },
180
+ "center_x": 67,
181
+ "center_y": 23,
182
+ "isVisible": true,
183
+ "isInteractive": true,
184
+ "isValidText": false,
185
+ "isTopElement": true,
186
+ "isInViewport": true,
187
+ "isParentHighlighted": false,
188
+ "xpath": "/html[1]/body[1]/div[1]/main[1]/div[1]/header[1]/button[2]",
189
+ "selector": "button.inline-flex.items-center.justify-center.gap-2.whitespace-nowrap.rounded-md.text-sm.font-medium.ring-offset-background.transition-colors.focus-visible:outline-none.focus-visible:ring-2.focus-visible:ring-ring.focus-visible:ring-offset-2.disabled:pointer-events-none.disabled:opacity-50.[&_svg]:pointer-events-none.[&_svg]:size-4.[&_svg]:shrink-0.border.border-input.bg-background.hover:bg-accent.hover:text-accent-foreground.h-10.py-2.order-2.md:order-1.md:px-2.px-2.md:h-fit.ml-auto.md:ml-0",
190
+ "highlightIndex": 6
191
+ }
192
+ }
193
+ }
194
+ ],
195
+ "Input": [
196
+ {
197
+ "url": "https://demo.chat-sdk.dev/",
198
+ "actions": [
199
+ {
200
+ "type": "Input",
201
+ "locate": {"id": "15"},
202
+ "param": {
203
+ "value": "test input",
204
+ "clear_before_type": true
205
+ }
206
+ }
207
+ ],
208
+ "id_map": {
209
+ "15": {
210
+ "node": "ref: <Node>",
211
+ "tagName": "textarea",
212
+ "className": "flex w-full border border-input px-3 py-2 text-base ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm min-h-[24px] max-h-[calc(75dvh)] overflow-hidden resize-none rounded-2xl !text-base bg-muted pb-10 dark:border-zinc-700",
213
+ "type": null,
214
+ "placeholder": "Send a message...",
215
+ "innerText": "",
216
+ "attributes": [
217
+ {
218
+ "name": "class",
219
+ "value": "flex w-full border border-input px-3 py-2 text-base ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm min-h-[24px] max-h-[calc(75dvh)] overflow-hidden resize-none rounded-2xl !text-base bg-muted pb-10 dark:border-zinc-700"
220
+ },
221
+ {
222
+ "name": "data-testid",
223
+ "value": "multimodal-input"
224
+ },
225
+ {
226
+ "name": "placeholder",
227
+ "value": "Send a message..."
228
+ },
229
+ {
230
+ "name": "rows",
231
+ "value": "2"
232
+ },
233
+ {
234
+ "name": "autofocus",
235
+ "value": ""
236
+ },
237
+ {
238
+ "name": "style",
239
+ "value": "height: 98px;"
240
+ }
241
+ ],
242
+ "viewport": {
243
+ "x": 272,
244
+ "y": 598,
245
+ "width": 736,
246
+ "height": 98
247
+ },
248
+ "center_x": 640,
249
+ "center_y": 647,
250
+ "isVisible": true,
251
+ "isInteractive": true,
252
+ "isValidText": false,
253
+ "isTopElement": true,
254
+ "isInViewport": true,
255
+ "isParentHighlighted": false,
256
+ "xpath": "/html[1]/body[1]/div[1]/main[1]/div[1]/form[1]/div[1]/textarea[1]",
257
+ "selector": "textarea.flex.w-full.border.border-input.px-3.py-2.text-base.ring-offset-background.placeholder:text-muted-foreground.focus-visible:outline-none.focus-visible:ring-2.focus-visible:ring-ring.focus-visible:ring-offset-2.disabled:cursor-not-allowed.disabled:opacity-50.md:text-sm.min-h-[24px].max-h-[calc(75dvh)].overflow-hidden.resize-none.rounded-2xl.!text-base.bg-muted.pb-10.dark:border-zinc-700",
258
+ "highlightIndex": 15
259
+ }
260
+ }
261
+ },
262
+ {
263
+ "url": "https://arxiv.org/search/",
264
+ "actions": [
265
+ {
266
+ "type": "Input",
267
+ "locate": {"id": "12"},
268
+ "param": {
269
+ "value": "test input",
270
+ "clear_before_type": true
271
+ }
272
+ }
273
+ ],
274
+ "id_map": {
275
+ "12": {
276
+ "node": "ref: <Node>",
277
+ "tagName": "input",
278
+ "className": "input is-medium",
279
+ "type": "text",
280
+ "placeholder": "Search term...",
281
+ "innerText": "",
282
+ "attributes": [
283
+ {
284
+ "name": "class",
285
+ "value": "input is-medium"
286
+ },
287
+ {
288
+ "name": "id",
289
+ "value": "query"
290
+ },
291
+ {
292
+ "name": "name",
293
+ "value": "query"
294
+ },
295
+ {
296
+ "name": "placeholder",
297
+ "value": "Search term..."
298
+ },
299
+ {
300
+ "name": "type",
301
+ "value": "text"
302
+ },
303
+ {
304
+ "name": "value",
305
+ "value": ""
306
+ },
307
+ {
308
+ "name": "style",
309
+ "value": ""
310
+ }
311
+ ],
312
+ "viewport": {
313
+ "x": 77.5,
314
+ "y": 280.203125,
315
+ "width": 825.640625,
316
+ "height": 39.375
317
+ },
318
+ "center_x": 490.3203125,
319
+ "center_y": 299.890625,
320
+ "isVisible": true,
321
+ "isInteractive": true,
322
+ "isValidText": false,
323
+ "isTopElement": true,
324
+ "isInViewport": true,
325
+ "isParentHighlighted": false,
326
+ "xpath": "//*[@id=\"query\"]",
327
+ "selector": "input#query",
328
+ "highlightIndex": 12
329
+ }
330
+ }
331
+ }
332
+ ],
333
+ "Scroll": [
334
+ {
335
+ "url": "https://arxiv.org/search/",
336
+ "actions": [
337
+ {
338
+ "type": "Scroll",
339
+ "locate": null,
340
+ "param": {
341
+ "direction": "down",
342
+ "scrollType": "untilBottom",
343
+ "distance": null
344
+ }
345
+ },
346
+ {
347
+ "type": "Scroll",
348
+ "locate": null,
349
+ "param": {
350
+ "direction": "up",
351
+ "scrollType": "untilTop",
352
+ "distance": null
353
+ }
354
+ }
355
+ ],
356
+ "id_map": {}
357
+ },
358
+ {
359
+ "url": "https://arxiv.org/list/astro-ph/new",
360
+ "actions": [
361
+ {
362
+ "type": "Scroll",
363
+ "locate": null,
364
+ "param": {
365
+ "direction": "down",
366
+ "scrollType": "once"
367
+ }
368
+ },
369
+ {
370
+ "type": "Scroll",
371
+ "locate": null,
372
+ "param": {
373
+ "direction": "down",
374
+ "scrollType": "once",
375
+ "distance": 400
376
+ }
377
+ },
378
+ {
379
+ "type": "Scroll",
380
+ "locate": null,
381
+ "param": {
382
+ "direction": "up",
383
+ "scrollType": "once",
384
+ "distance": 200
385
+ }
386
+ }
387
+ ],
388
+ "id_map": {}
389
+ }
390
+ ],
391
+ "SelectDropdown": [
392
+ {
393
+ "url": "https://demo.chat-sdk.dev/",
394
+ "actions": [
395
+ {
396
+ "type": "Tap",
397
+ "locate": {"id": "3"}
398
+ },
399
+ {
400
+ "type": "SelectDropdown",
401
+ "locate": {"dropdown_id": "3", "option_id": "5"},
402
+ "param": {
403
+ "selection_path": "Reasoning model"
404
+ }
405
+ }
406
+ ],
407
+ "id_map": {
408
+ "3": {
409
+ "node": "ref: <Node>",
410
+ "tagName": "button",
411
+ "className": "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 border border-input bg-background hover:bg-accent hover:text-accent-foreground h-10 px-4 py-2 w-fit data-[state=open]:bg-accent data-[state=open]:text-accent-foreground order-1 md:order-2 md:px-2 md:h-[34px]",
412
+ "type": "button",
413
+ "placeholder": null,
414
+ "innerText": "Chat model",
415
+ "attributes": [
416
+ {
417
+ "name": "class",
418
+ "value": "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 border border-input bg-background hover:bg-accent hover:text-accent-foreground h-10 px-4 py-2 w-fit data-[state=open]:bg-accent data-[state=open]:text-accent-foreground order-1 md:order-2 md:px-2 md:h-[34px]"
419
+ },
420
+ {
421
+ "name": "data-testid",
422
+ "value": "model-selector"
423
+ },
424
+ {
425
+ "name": "type",
426
+ "value": "button"
427
+ },
428
+ {
429
+ "name": "id",
430
+ "value": "radix-«R6l7lelb»"
431
+ },
432
+ {
433
+ "name": "aria-haspopup",
434
+ "value": "menu"
435
+ },
436
+ {
437
+ "name": "aria-expanded",
438
+ "value": "false"
439
+ },
440
+ {
441
+ "name": "data-state",
442
+ "value": "closed"
443
+ }
444
+ ],
445
+ "viewport": {
446
+ "x": 92,
447
+ "y": 6,
448
+ "width": 117.328125,
449
+ "height": 34
450
+ },
451
+ "center_x": 150.6640625,
452
+ "center_y": 23,
453
+ "isVisible": true,
454
+ "isInteractive": true,
455
+ "isValidText": false,
456
+ "isTopElement": true,
457
+ "isInViewport": true,
458
+ "isParentHighlighted": false,
459
+ "xpath": "//*[@id=\"radix-«R6l7lelb»\"]",
460
+ "selector": "button#radix-«R6l7lelb»",
461
+ "highlightIndex": 3
462
+ },
463
+ "5": {
464
+ "node": "ref: <Node>",
465
+ "tagName": "button",
466
+ "className": "relative flex cursor-default select-none items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-none transition-colors focus:bg-accent focus:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 gap-4 group/item flex flex-row justify-between items-center w-full",
467
+ "type": "button",
468
+ "innerText": "Reasoning model\nUses advanced reasoning",
469
+ "attributes": [{
470
+ "name": "type",
471
+ "value": "button"
472
+ }, {
473
+ "name": "class",
474
+ "value": "relative flex cursor-default select-none items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-none transition-colors focus:bg-accent focus:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 gap-4 group/item flex flex-row justify-between items-center w-full"
475
+ }, {
476
+ "name": "role",
477
+ "value": "menuitem"
478
+ }, {
479
+ "name": "data-testid",
480
+ "value": "model-selector-item-chat-model-reasoning"
481
+ }, {
482
+ "name": "data-active",
483
+ "value": "false"
484
+ }, {
485
+ "name": "tabindex",
486
+ "value": "-1"
487
+ }, {
488
+ "name": "data-orientation",
489
+ "value": "vertical"
490
+ }, {
491
+ "name": "data-radix-collection-item",
492
+ "value": ""
493
+ }],
494
+ "viewport": {
495
+ "x": 97,
496
+ "y": 101,
497
+ "width": 290,
498
+ "height": 52
499
+ },
500
+ "center_x": 242,
501
+ "center_y": 127,
502
+ "xpath": "/html[1]/body[1]/div[2]/div[1]/button[2]",
503
+ "selector": "button.relative.flex.cursor-default.select-none.items-center.gap-2.rounded-sm.px-2.py-1.5.text-sm.outline-none.transition-colors.focus:bg-accent.focus:text-accent-foreground.data-[disabled]:pointer-events-none.data-[disabled]:opacity-50.[&_svg]:pointer-events-none.[&_svg]:size-4.[&_svg]:shrink-0.gap-4.group/item.flex-row.justify-between.w-full",
504
+ "highlightIndex": 3
505
+ }
506
+ }
507
+ },
508
+ {
509
+ "name": "local dropdowns page (native, ant-select, ant-cascader)",
510
+ "url": "__LOCAL_DROPDOWN_PAGE__",
511
+ "id_map": {
512
+ "13": { "center_x": 210, "center_y": 116 },
513
+ "101": { "center_x": 210, "center_y": 217 },
514
+ "301": { "center_x": 210, "center_y": 317 }
515
+ },
516
+ "actions": [
517
+ { "type": "SelectDropdown", "locate": { "dropdown_id": "13" }, "param": { "selection_path": "Title" } },
518
+ { "type": "SelectDropdown", "locate": { "dropdown_id": "101" }, "param": { "selection_path": "Reasoning model" } },
519
+ { "type": "SelectDropdown", "locate": { "dropdown_id": "301" }, "param": { "selection_path": ["Asia", "China", "Beijing"] } }
520
+ ]
521
+ },
522
+ {
523
+ "url": "https://arxiv.org/search/",
524
+ "actions": [
525
+ {
526
+ "type": "SelectDropdown",
527
+ "locate": {"dropdown_id": "13"},
528
+ "param": {
529
+ "selection_path": "Title"
530
+ }
531
+ }
532
+ ],
533
+ "id_map": {
534
+ "13": {
535
+ "node": "ref: <Node>",
536
+ "tagName": "select",
537
+ "className": "is-medium",
538
+ "type": null,
539
+ "placeholder": null,
540
+ "innerText": "All fields\nTitle\nAuthor(s)\nAbstract\nComments\nJournal reference\nACM classification\nMSC classification\nReport number\narXiv identifier\nDOI\nORCID\nLicense (URI)\narXiv author ID\nHelp pages\nFull text",
541
+ "attributes": [
542
+ {
543
+ "name": "class",
544
+ "value": "is-medium"
545
+ },
546
+ {
547
+ "name": "id",
548
+ "value": "searchtype"
549
+ },
550
+ {
551
+ "name": "name",
552
+ "value": "searchtype"
553
+ }
554
+ ],
555
+ "viewport": {
556
+ "x": 902.140625,
557
+ "y": 280.203125,
558
+ "width": 202.6875,
559
+ "height": 39.375
560
+ },
561
+ "center_x": 1003.484375,
562
+ "center_y": 299.890625,
563
+ "isVisible": true,
564
+ "isInteractive": true,
565
+ "isValidText": false,
566
+ "isTopElement": true,
567
+ "isInViewport": true,
568
+ "isParentHighlighted": false,
569
+ "xpath": "//*[@id=\"searchtype\"]",
570
+ "selector": "select#searchtype",
571
+ "highlightIndex": 13
572
+ }
573
+ }
574
+ }
575
+ ]
576
+ }
tests/mocks/actions_negative_mocks.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "SelectDropdown_Negative": [
3
+ {
4
+ "name": "native select: option not found",
5
+ "url": "__LOCAL_DROPDOWN_PAGE__",
6
+ "id_map": { "13": { "center_x": 210, "center_y": 116 } },
7
+ "actions": [
8
+ {
9
+ "type": "SelectDropdown",
10
+ "locate": { "dropdown_id": "13" },
11
+ "param": { "selection_path": "Non-Existing-Option" }
12
+ }
13
+ ]
14
+ },
15
+ {
16
+ "name": "ant-select: option not found",
17
+ "url": "__LOCAL_DROPDOWN_PAGE__",
18
+ "id_map": { "101": { "center_x": 210, "center_y": 217 } },
19
+ "actions": [
20
+ {
21
+ "type": "SelectDropdown",
22
+ "locate": { "dropdown_id": "101" },
23
+ "param": { "selection_path": "Nonexistent Model" }
24
+ }
25
+ ]
26
+ },
27
+ {
28
+ "name": "ant-select: option_id missing in buffer, fallback still not found",
29
+ "url": "__LOCAL_DROPDOWN_PAGE__",
30
+ "id_map": { "101": { "center_x": 210, "center_y": 217 } },
31
+ "actions": [
32
+ {
33
+ "type": "SelectDropdown",
34
+ "locate": { "dropdown_id": "101", "option_id": "9999" },
35
+ "param": { "selection_path": "Nonexistent Model" }
36
+ }
37
+ ]
38
+ },
39
+ {
40
+ "name": "cascader: level 1 option not found",
41
+ "url": "__LOCAL_DROPDOWN_PAGE__",
42
+ "id_map": { "301": { "center_x": 210, "center_y": 317 } },
43
+ "actions": [
44
+ {
45
+ "type": "SelectDropdown",
46
+ "locate": { "dropdown_id": "301" },
47
+ "param": { "selection_path": ["Asia", "Korea"] }
48
+ }
49
+ ]
50
+ },
51
+ {
52
+ "name": "cascader: exceeds maximum levels",
53
+ "url": "__LOCAL_DROPDOWN_PAGE__",
54
+ "id_map": { "301": { "center_x": 210, "center_y": 317 } },
55
+ "actions": [
56
+ {
57
+ "type": "SelectDropdown",
58
+ "locate": { "dropdown_id": "301" },
59
+ "param": { "selection_path": ["Asia", "China", "Beijing", "Street-Not-Exists"] }
60
+ }
61
+ ]
62
+ }
63
+ ],
64
+ "Scroll_Negative": [
65
+ {
66
+ "url": "https://arxiv.org/list/astro-ph/new",
67
+ "actions": [
68
+ {
69
+ "type": "Scroll",
70
+ "locate": null,
71
+ "param": {
72
+ "direction": "right",
73
+ "scrollType": "once"
74
+ }
75
+ },
76
+ {
77
+ "type": "Scroll",
78
+ "locate": null,
79
+ "param": {
80
+ "direction": "down",
81
+ "scrollType": "untilRight",
82
+ "distance": null
83
+ }
84
+ }
85
+ ],
86
+ "id_map": {}
87
+ }
88
+ ]
89
+ }
tests/test_action_executor.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ # Add project root to Python path
5
+ sys.path.insert(0, str(Path(__file__).parent.parent))
6
+
7
+ import asyncio
8
+ import json
9
+ import os
10
+ import pathlib
11
+ from datetime import datetime
12
+
13
+ import pytest
14
+ from playwright.async_api import async_playwright
15
+
16
+ from webqa_agent.actions.action_executor import ActionExecutor
17
+ from webqa_agent.actions.action_handler import ActionHandler
18
+
19
+ # pytest tests/test_action_executor.py::TestActionExecutor::test_click_action -v -s
20
+ # pytest tests/test_action_executor.py::TestActionExecutor -v -s
21
+
22
+
23
+ # Local test pages directory and placeholder map for extensibility
24
+ LOCAL_PAGES_DIR = pathlib.Path(__file__).parent / 'test_pages'
25
+ PLACEHOLDER_LOCAL_PAGES = {
26
+ '__LOCAL_DROPDOWN_PAGE__': 'dropdown_components.html',
27
+ }
28
+
29
+ MOCKS_PATH = pathlib.Path(__file__).parent / 'mocks' / 'action_mocks.json'
30
+ MOCKS_PATH_NEGATIVE = pathlib.Path(__file__).parent / 'mocks' / 'actions_negative_mocks.json'
31
+
32
+
33
+ class TestActionExecutor:
34
+ # Results directories for action tests
35
+ results_dir = pathlib.Path(__file__).parent / 'actions_test_results'
36
+ screenshots_dir = results_dir / 'screenshots'
37
+
38
+ # Global navigation settings
39
+ GOTO_WAIT_UNTIL = 'networkidle'
40
+ GOTO_TIMEOUT_MS = 30000
41
+
42
+ async def setup_method(self):
43
+ """Setup method called before each test."""
44
+ # Ensure directories exist
45
+ self.results_dir.mkdir(parents=True, exist_ok=True)
46
+ self.screenshots_dir.mkdir(parents=True, exist_ok=True)
47
+ self.playwright = await async_playwright().start()
48
+ self.browser = await self.playwright.chromium.launch(headless=False)
49
+ self.context = await self.browser.new_context(
50
+ viewport={'width': 1280, 'height': 720},
51
+ )
52
+ self.page = await self.context.new_page()
53
+
54
+ # Initialize action handler and executor
55
+ self.action_handler = ActionHandler()
56
+ await self.action_handler.initialize(self.page)
57
+ self.action_executor = ActionExecutor(self.action_handler)
58
+ await self.action_executor.initialize()
59
+
60
+ async def teardown_method(self):
61
+ """Teardown method called after each test."""
62
+ if self.context:
63
+ await self.context.close()
64
+ if self.browser:
65
+ await self.browser.close()
66
+ if self.playwright:
67
+ await self.playwright.stop()
68
+
69
+ def resolve_url(self, url: str) -> str:
70
+ if url in PLACEHOLDER_LOCAL_PAGES:
71
+ target = LOCAL_PAGES_DIR / PLACEHOLDER_LOCAL_PAGES[url]
72
+ return target.resolve().as_uri()
73
+
74
+ generic_prefix = '__LOCAL_PAGE__:'
75
+ if url.startswith(generic_prefix):
76
+ rel = url[len(generic_prefix) :].strip()
77
+ rel_path = pathlib.Path(rel)
78
+ if rel_path.is_absolute() or '..' in rel_path.parts:
79
+ raise ValueError(f'Invalid local page path: {rel}')
80
+ target = LOCAL_PAGES_DIR / rel_path
81
+ return target.resolve().as_uri()
82
+
83
+ return url
84
+
85
+ async def navigate(self, url: str) -> None:
86
+ """Navigate to a resolved URL using global navigation settings."""
87
+ await self.page.goto(
88
+ self.resolve_url(url),
89
+ wait_until=self.GOTO_WAIT_UNTIL,
90
+ timeout=self.GOTO_TIMEOUT_MS,
91
+ )
92
+
93
+ def get_timestamp(self) -> str:
94
+ return datetime.now().strftime('%Y%m%d_%H%M%S')
95
+
96
+ async def take_before_screenshot(self, url: str, param_name: str) -> str:
97
+ """Take screenshot before action."""
98
+ timestamp = self.get_timestamp()
99
+ safe_url = url.replace('://', '_').replace('/', '_')
100
+ screenshot_path = self.screenshots_dir / f'{param_name}_{safe_url}_before_{timestamp}.png'
101
+ await self.page.screenshot(path=str(screenshot_path), full_page=False)
102
+ return str(screenshot_path)
103
+
104
+ async def take_after_screenshot(self, url: str, param_name: str) -> str:
105
+ """Take screenshot after action."""
106
+ timestamp = self.get_timestamp()
107
+ safe_url = url.replace('://', '_').replace('/', '_')
108
+ screenshot_path = self.screenshots_dir / f'{param_name}_{safe_url}_after_{timestamp}.png'
109
+ await self.page.screenshot(path=str(screenshot_path), full_page=False)
110
+ return str(screenshot_path)
111
+
112
+ @pytest.mark.asyncio
113
+ async def test_click_action(self):
114
+ """Test click action."""
115
+ await self.setup_method()
116
+ try:
117
+ # Load mocks and iterate
118
+ with open(MOCKS_PATH, 'r', encoding='utf-8') as f:
119
+ mocks = json.load(f)
120
+ tap_cases = mocks.get('Tap', [])
121
+ assert len(tap_cases) > 0
122
+ for i, case in enumerate(tap_cases):
123
+ await self.navigate(case['url'])
124
+ self.action_handler.set_page_element_buffer(case['id_map'])
125
+ before_path = await self.take_before_screenshot(case['url'], 'click')
126
+
127
+ # Execute click action
128
+ for action in case['actions']:
129
+ result = await self.action_executor.execute(action)
130
+ await asyncio.sleep(2)
131
+ elementid = action['locate']['id']
132
+ after_path = await self.take_after_screenshot(case['url'], f'click_{elementid}')
133
+
134
+ # Verify results
135
+ assert result['success'] is True
136
+ assert os.path.exists(before_path)
137
+ assert os.path.exists(after_path)
138
+
139
+ finally:
140
+ await self.teardown_method()
141
+
142
+ @pytest.mark.asyncio
143
+ async def test_hover_action(self):
144
+ """Test hover action."""
145
+ await self.setup_method()
146
+ try:
147
+ with open(MOCKS_PATH, 'r', encoding='utf-8') as f:
148
+ mocks = json.load(f)
149
+ hover_cases = mocks.get('Hover', [])
150
+ assert len(hover_cases) > 0
151
+ for i, case in enumerate(hover_cases):
152
+ await self.navigate(case['url'])
153
+ self.action_handler.set_page_element_buffer(case['id_map'])
154
+ before_path = await self.take_before_screenshot(case['url'], 'hover')
155
+
156
+ for action in case['actions']:
157
+ result = await self.action_executor.execute(action)
158
+ await asyncio.sleep(2)
159
+ elementid = action['locate']['id']
160
+ after_path = await self.take_after_screenshot(case['url'], f'hover_{elementid}')
161
+
162
+ # Verify results
163
+ assert result['success'] is True
164
+ assert os.path.exists(before_path)
165
+ assert os.path.exists(after_path)
166
+
167
+ finally:
168
+ await self.teardown_method()
169
+
170
+ @pytest.mark.asyncio
171
+ async def test_input_action(self):
172
+ """Test input action."""
173
+ await self.setup_method()
174
+ try:
175
+ with open(MOCKS_PATH, 'r', encoding='utf-8') as f:
176
+ mocks = json.load(f)
177
+ input_cases = mocks.get('Input', [])
178
+ assert len(input_cases) > 0
179
+ for i, case in enumerate(input_cases):
180
+ await self.navigate(case['url'])
181
+ self.action_handler.set_page_element_buffer(case['id_map'])
182
+ before_path = await self.take_before_screenshot(case['url'], 'input')
183
+
184
+ for action in case['actions']:
185
+ result = await self.action_executor.execute(action)
186
+ await asyncio.sleep(2)
187
+ elementid = action['locate']['id']
188
+ after_path = await self.take_after_screenshot(case['url'], f'input_{elementid}')
189
+
190
+ # Verify results
191
+ assert result['success'] is True
192
+ assert os.path.exists(before_path)
193
+ assert os.path.exists(after_path)
194
+
195
+ finally:
196
+ await self.teardown_method()
197
+
198
+ @pytest.mark.asyncio
199
+ async def test_scroll_action(self):
200
+ """Test scroll action."""
201
+ await self.setup_method()
202
+ try:
203
+ with open(MOCKS_PATH, 'r', encoding='utf-8') as f:
204
+ mocks = json.load(f)
205
+ scroll_cases = mocks.get('Scroll', [])
206
+ assert len(scroll_cases) > 0
207
+ for i, case in enumerate(scroll_cases):
208
+ await self.navigate(case['url'])
209
+ self.action_handler.set_page_element_buffer(case['id_map'])
210
+ before_path = await self.take_before_screenshot(case['url'], f'scroll_{i}')
211
+
212
+ for j, action in enumerate(case['actions']):
213
+ result = await self.action_executor.execute(action)
214
+ await asyncio.sleep(2)
215
+ after_path = await self.take_after_screenshot(case['url'], f'scroll_{i}_{j}')
216
+
217
+ assert result['success'] is True
218
+ assert os.path.exists(before_path)
219
+ assert os.path.exists(after_path)
220
+
221
+ finally:
222
+ await self.teardown_method()
223
+
224
+ @pytest.mark.asyncio
225
+ async def test_select_dropdown_action(self):
226
+ """Test select dropdown action."""
227
+ await self.setup_method()
228
+ try:
229
+ with open(MOCKS_PATH, 'r', encoding='utf-8') as f:
230
+ mocks = json.load(f)
231
+ select_dropdown_cases = mocks.get('SelectDropdown', [])
232
+ assert len(select_dropdown_cases) > 0
233
+ for i, case in enumerate(select_dropdown_cases):
234
+ await self.navigate(case['url'])
235
+ self.action_handler.set_page_element_buffer(case['id_map'])
236
+ before_path = await self.take_before_screenshot(case['url'], f'select_dropdown_{i}')
237
+
238
+ for j, action in enumerate(case['actions']):
239
+ result = await self.action_executor.execute(action)
240
+ print(f"[SelectDropdown][{case.get('name','case')}]: {result.get('message','')}\n")
241
+ await asyncio.sleep(5)
242
+ after_path = await self.take_after_screenshot(case['url'], f'select_dropdown_{j}')
243
+
244
+ assert result['success'] is True
245
+ assert os.path.exists(before_path)
246
+ assert os.path.exists(after_path)
247
+
248
+ finally:
249
+ await self.teardown_method()
250
+
251
+ @pytest.mark.asyncio
252
+ async def test_select_dropdown_action_negative(self):
253
+ """Negative tests for select dropdown action: expect success == False and print message"""
254
+ await self.setup_method()
255
+ try:
256
+ with open(MOCKS_PATH_NEGATIVE, 'r', encoding='utf-8') as f:
257
+ mocks = json.load(f)
258
+ neg_cases = mocks.get('SelectDropdown_Negative', [])
259
+ assert len(neg_cases) > 0
260
+
261
+ for i, case in enumerate(neg_cases):
262
+ # about:blank is fine without networkidle wait
263
+ await self.navigate(case['url'])
264
+ self.action_handler.set_page_element_buffer(case.get('id_map', {}))
265
+
266
+ for action in case['actions']:
267
+ result = await self.action_executor.execute(action)
268
+ print(f"[SelectDropdown_Negative][{case.get('name','case')}]: {result.get('message','')}\n")
269
+ assert result.get('success') is False
270
+
271
+ finally:
272
+ await self.teardown_method()
273
+
274
+ @pytest.mark.asyncio
275
+ async def test_scroll_action_negative(self):
276
+ """Negative tests for scroll action: expect success == False and print message"""
277
+ await self.setup_method()
278
+ try:
279
+ with open(MOCKS_PATH_NEGATIVE, 'r', encoding='utf-8') as f:
280
+ mocks = json.load(f)
281
+ scroll_neg_cases = mocks.get('Scroll_Negative', [])
282
+ assert len(scroll_neg_cases) > 0
283
+
284
+ for i, case in enumerate(scroll_neg_cases):
285
+ await self.page.goto(self.resolve_url(case['url']), wait_until='networkidle', timeout=30000)
286
+ self.action_handler.set_page_element_buffer(case['id_map'])
287
+ before_path = await self.take_before_screenshot(case['url'], f'scroll_{i}')
288
+
289
+ for j, action in enumerate(case['actions']):
290
+ result = await self.action_executor.execute(action)
291
+ print(f"[Scroll_Negative][{case.get('name','case')}]: {result.get('message','')}\n")
292
+ await asyncio.sleep(2)
293
+ after_path = await self.take_after_screenshot(case['url'], f'scroll_{i}_{j}')
294
+
295
+ assert result['success'] is False
296
+ assert os.path.exists(before_path)
297
+ assert os.path.exists(after_path)
298
+
299
+ finally:
300
+ await self.teardown_method()
tests/test_crawler.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import sys
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List
7
+
8
+ # Add project root to Python path
9
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
10
+
11
+ import pytest
12
+ from playwright.async_api import async_playwright
13
+
14
+ from webqa_agent.crawler.crawl import CrawlHandler
15
+ from webqa_agent.crawler.deep_crawler import DeepCrawler
16
+
17
+ # pytest tests/test_crawler.py::TestCrawler::test_highlight_crawl -v -s --url https://google.com
18
+ # pytest tests/test_crawler.py -v -s --url https://google.com
19
+
20
+
21
+ class TestCrawler:
22
+ """Test suite for web crawling functionality with different parameters."""
23
+
24
+ # Default test URLs (can be overridden)
25
+ DEFAULT_TEST_URLS = 'https://google.com'
26
+
27
+ # Different crawl parameter combinations to test
28
+ CRAWL_PARAMS = [
29
+ {'name': 'highlight_crawl', 'highlight': True, 'highlight_text': False, 'viewport_only': True},
30
+ {'name': 'text_highlight_crawl', 'highlight': True, 'highlight_text': True, 'viewport_only': True},
31
+ {'name': 'viewport_highlight_crawl', 'highlight': True, 'highlight_text': False, 'viewport_only': True},
32
+ ]
33
+
34
+ # Directories (class attributes; accessible via self)
35
+ test_results_dir = Path(__file__).parent / 'crawler_test_results'
36
+ screenshots_dir = test_results_dir / 'screenshots'
37
+ id_maps_dir = test_results_dir / 'id_maps'
38
+ crawl_data_dir = test_results_dir / 'crawl_data'
39
+ clean_id_maps_dir = test_results_dir / 'clean_id_maps'
40
+
41
+ async def setup_method(self):
42
+ """Setup method called before each test."""
43
+ # Ensure directories exist
44
+ self.test_results_dir.mkdir(parents=True, exist_ok=True)
45
+ self.screenshots_dir.mkdir(parents=True, exist_ok=True)
46
+ self.id_maps_dir.mkdir(parents=True, exist_ok=True)
47
+ self.crawl_data_dir.mkdir(parents=True, exist_ok=True)
48
+ self.clean_id_maps_dir.mkdir(parents=True, exist_ok=True)
49
+ self.playwright = await async_playwright().start()
50
+ self.browser = await self.playwright.chromium.launch(
51
+ headless=False,
52
+ args=[
53
+ '--no-sandbox',
54
+ '--disable-setuid-sandbox',
55
+ '--disable-gpu',
56
+ '--force-device-scale-factor=1',
57
+ ],
58
+ )
59
+ self.context = await self.browser.new_context(
60
+ viewport={'width': 1280, 'height': 720},
61
+ )
62
+ self.page = await self.context.new_page()
63
+
64
+ # Set default timeout
65
+ self.page.set_default_navigation_timeout(30000)
66
+ self.page.set_default_timeout(30000)
67
+
68
+ async def teardown_method(self):
69
+ """Teardown method called after each test."""
70
+ if self.context:
71
+ await self.context.close()
72
+ if self.browser:
73
+ await self.browser.close()
74
+ if self.playwright:
75
+ await self.playwright.stop()
76
+
77
+ def get_timestamp(self) -> str:
78
+ """Get timestamp for file naming."""
79
+ return datetime.now().strftime('%Y%m%d_%H%M%S')
80
+
81
+ async def take_before_screenshot(self, url: str, param_name: str) -> str:
82
+ """Take screenshot before crawling."""
83
+ timestamp = self.get_timestamp()
84
+ safe_url = url.replace('://', '_').replace('/', '_')
85
+ screenshot_path = self.screenshots_dir / f'{param_name}_{safe_url}_before_{timestamp}.png'
86
+ await self.page.screenshot(path=str(screenshot_path), full_page=True)
87
+ return str(screenshot_path)
88
+
89
+ async def take_after_screenshot(self, url: str, param_name: str) -> str:
90
+ """Take screenshot after crawling (with possible highlights)"""
91
+ timestamp = self.get_timestamp()
92
+ screenshot_path = (
93
+ self.screenshots_dir / f"{param_name}_{url.replace('://', '_').replace('/', '_')}_after_{timestamp}.png"
94
+ )
95
+ await self.page.screenshot(path=str(screenshot_path), full_page=True)
96
+ return str(screenshot_path)
97
+
98
+ def save_id_map(self, url: str, param_name: str, id_map: Dict[str, Any]) -> str:
99
+ """Save ID map to JSON file."""
100
+ timestamp = self.get_timestamp()
101
+ id_map_path = (
102
+ self.id_maps_dir / f"{param_name}_{url.replace('://', '_').replace('/', '_')}_id_map_{timestamp}.json"
103
+ )
104
+
105
+ with open(id_map_path, 'w', encoding='utf-8') as f:
106
+ json.dump(id_map, f, ensure_ascii=False, indent=2)
107
+
108
+ return str(id_map_path)
109
+
110
+ def save_clean_id_map(self, url: str, param_name: str, clean_id_map: Dict[str, Any]) -> str:
111
+ """Save clean ID map to JSON file."""
112
+ timestamp = self.get_timestamp()
113
+ clean_id_map_path = (
114
+ self.clean_id_maps_dir / f"{param_name}_{url.replace('://', '_').replace('/', '_')}_clean_id_map_{timestamp}.json"
115
+ )
116
+
117
+ with open(clean_id_map_path, 'w', encoding='utf-8') as f:
118
+ json.dump(clean_id_map, f, ensure_ascii=False, indent=2)
119
+
120
+ return str(clean_id_map_path)
121
+
122
+ def save_crawl_data(self, url: str, param_name: str, crawl_data: Dict[str, Any]) -> str:
123
+ """Save crawl data to JSON file."""
124
+ timestamp = self.get_timestamp()
125
+ crawl_data_path = (
126
+ self.crawl_data_dir
127
+ / f"{param_name}_{url.replace('://', '_').replace('/', '_')}_crawl_data_{timestamp}.json"
128
+ )
129
+
130
+ with open(crawl_data_path, 'w', encoding='utf-8') as f:
131
+ json.dump(crawl_data, f, ensure_ascii=False, indent=2)
132
+
133
+ return str(crawl_data_path)
134
+
135
+ def save_test_summary(self, test_results: List[Dict[str, Any]]) -> str:
136
+ """Save test summary to JSON file."""
137
+ timestamp = self.get_timestamp()
138
+ summary_path = self.test_results_dir / f'test_summary_{timestamp}.json'
139
+
140
+ with open(summary_path, 'w', encoding='utf-8') as f:
141
+ json.dump(test_results, f, ensure_ascii=False, indent=2)
142
+
143
+ return str(summary_path)
144
+
145
+ async def crawl_single_url(self, url: str, params: Dict[str, Any]) -> Dict[str, Any]:
146
+ """Crawl a single URL with specified parameters using the current
147
+ page/context."""
148
+ await self.page.goto(url, wait_until='networkidle')
149
+
150
+ # Take before screenshot
151
+ before_screenshot = await self.take_before_screenshot(url, params['name'])
152
+
153
+ # Initialize crawler and perform crawling
154
+ crawler = DeepCrawler(self.page)
155
+ crawl_result = await crawler.crawl(
156
+ page=self.page,
157
+ highlight=params['highlight'],
158
+ highlight_text=params['highlight_text'],
159
+ viewport_only=params['viewport_only'],
160
+ )
161
+ crawl_data = crawl_result.element_tree
162
+ id_map = crawl_result.raw_dict()
163
+ clean_id_map = crawl_result.clean_dict()
164
+
165
+ # Take after screenshot
166
+ after_screenshot = await self.take_after_screenshot(url, params['name'])
167
+
168
+ # Save results
169
+ id_map_path = self.save_id_map(url, params['name'], id_map)
170
+ clean_id_map_path = self.save_clean_id_map(url, params['name'], clean_id_map)
171
+ crawl_data_path = self.save_crawl_data(url, params['name'], crawl_data)
172
+
173
+ # Remove markers if highlights were added
174
+ if params['highlight']:
175
+ await crawler.remove_marker(self.page)
176
+
177
+ return {
178
+ 'url': url,
179
+ 'parameters': params,
180
+ 'results': {
181
+ 'before_screenshot': before_screenshot,
182
+ 'after_screenshot': after_screenshot,
183
+ 'id_map_path': id_map_path,
184
+ 'clean_id_map_path': clean_id_map_path,
185
+ 'crawl_data_path': crawl_data_path,
186
+ 'success': True,
187
+ },
188
+ }
189
+
190
+ @pytest.mark.asyncio
191
+ async def test_crawl_link(self, request):
192
+ """Test integration with CrawlHandler for link extraction."""
193
+ await self.setup_method()
194
+
195
+ try:
196
+ # Resolve URL from CLI/env or default
197
+ test_url = request.config.getoption('--url') or self.DEFAULT_TEST_URLS
198
+
199
+ # Navigate to the test URL
200
+ await self.page.goto(test_url, wait_until='networkidle')
201
+
202
+ # Take before screenshot
203
+ before_screenshot = await self.take_before_screenshot(test_url, 'crawl_handler')
204
+
205
+ # Initialize crawl handler
206
+ crawl_handler = CrawlHandler(test_url)
207
+
208
+ # Extract links
209
+ links = await crawl_handler.extract_links(self.page)
210
+ print(f'🔗 Found {len(links)} links')
211
+
212
+ # Get clickable elements using crawl handler
213
+ clickable_elements = await crawl_handler.clickable_elements_detection(self.page)
214
+ print(f'🖱️ Found {len(clickable_elements)} clickable elements')
215
+
216
+ # Take after screenshot
217
+ after_screenshot = await self.take_after_screenshot(test_url, 'crawl_handler')
218
+
219
+ # Save results
220
+ results = {
221
+ 'url': test_url,
222
+ 'links': links,
223
+ 'clickable_elements': clickable_elements,
224
+ 'links_count': len(links),
225
+ 'clickable_elements_count': len(clickable_elements),
226
+ }
227
+
228
+ results_path = self.save_crawl_data(test_url, 'crawl_handler', results)
229
+
230
+ # Assertions
231
+ assert isinstance(links, list)
232
+ assert isinstance(clickable_elements, list)
233
+ assert os.path.exists(before_screenshot)
234
+ assert os.path.exists(after_screenshot)
235
+ assert os.path.exists(results_path)
236
+
237
+ print('CrawlHandler integration test passed')
238
+
239
+ finally:
240
+ await self.teardown_method()
241
+
242
+ @pytest.mark.asyncio
243
+ async def test_highlight_crawl(self, request):
244
+ """Test highlighted crawl parameters."""
245
+ await self.setup_method()
246
+
247
+ try:
248
+ test_url = request.config.getoption('--url') or self.DEFAULT_TEST_URLS
249
+
250
+ params = self.CRAWL_PARAMS[0] # highlight_crawl
251
+ result = await self.crawl_single_url(test_url, params)
252
+
253
+ assert result['results']['success']
254
+ assert os.path.exists(result['results']['before_screenshot'])
255
+ assert os.path.exists(result['results']['after_screenshot'])
256
+ assert os.path.exists(result['results']['id_map_path'])
257
+ assert os.path.exists(result['results']['crawl_data_path'])
258
+ finally:
259
+ await self.teardown_method()
260
+
261
+ @pytest.mark.asyncio
262
+ async def test_text_highlight_crawl(self, request):
263
+ """Test full highlight crawl parameters."""
264
+ await self.setup_method()
265
+
266
+ try:
267
+ test_url = request.config.getoption('--url') or self.DEFAULT_TEST_URLS
268
+
269
+ params = self.CRAWL_PARAMS[1] # text_highlight_crawl
270
+ result = await self.crawl_single_url(test_url, params)
271
+
272
+ assert result['results']['success']
273
+ assert os.path.exists(result['results']['before_screenshot'])
274
+ assert os.path.exists(result['results']['after_screenshot'])
275
+ assert os.path.exists(result['results']['id_map_path'])
276
+ assert os.path.exists(result['results']['crawl_data_path'])
277
+
278
+ finally:
279
+ await self.teardown_method()
280
+
281
+ @pytest.mark.asyncio
282
+ async def test_viewport_highlight_crawl(self, request):
283
+ """Test viewport highlight crawl parameters."""
284
+ await self.setup_method()
285
+
286
+ try:
287
+ test_url = request.config.getoption('--url') or self.DEFAULT_TEST_URLS
288
+
289
+ params = self.CRAWL_PARAMS[2] # viewport_highlight_crawl
290
+ result = await self.crawl_single_url(test_url, params)
291
+
292
+ assert result['results']['success']
293
+ assert os.path.exists(result['results']['before_screenshot'])
294
+ assert os.path.exists(result['results']['after_screenshot'])
295
+ assert os.path.exists(result['results']['id_map_path'])
296
+ assert os.path.exists(result['results']['crawl_data_path'])
297
+
298
+ finally:
299
+ await self.teardown_method()
tests/test_loading_animation.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ from webqa_agent.utils.loading_animation import LoadingAnimation
4
+
5
+ with LoadingAnimation("Testing..."):
6
+ print("This is a test message.")
7
+ time.sleep(5)
tests/test_pages/dropdown_components.html ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Dropdown Components Test Page</title>
7
+ <style>
8
+ body { font-family: sans-serif; }
9
+ .section { position: relative; margin: 0; padding: 0; }
10
+
11
+ /* Absolute positions to make center_x/center_y predictable */
12
+ #native-container { position: absolute; left: 100px; top: 100px; width: 220px; height: 32px; }
13
+ #ant-select-container { position: absolute; left: 100px; top: 200px; width: 220px; height: 34px; }
14
+ #ant-cascader-container { position: absolute; left: 100px; top: 300px; width: 220px; height: 34px; }
15
+
16
+ /* Mimic Ant Design Select */
17
+ .ant-select { position: relative; display: inline-block; width: 100%; }
18
+ .ant-select-selector { box-sizing: border-box; width: 100%; height: 34px; border: 1px solid #999; border-radius: 4px; padding: 6px 8px; background: #fff; cursor: pointer; }
19
+ .ant-select-dropdown { position: absolute; left: 0; top: 36px; width: 100%; border: 1px solid #999; border-radius: 4px; background: #fff; box-shadow: 0 2px 8px rgba(0,0,0,0.15); z-index: 1000; }
20
+ .ant-select-dropdown-hidden { display: none; }
21
+ .ant-select-item-option { padding: 6px 8px; cursor: pointer; }
22
+ .ant-select-item-option:hover { background: #f5f5f5; }
23
+ .ant-select-item-option-content { display: inline-block; }
24
+
25
+ /* Mimic Ant Design Cascader */
26
+ .ant-cascader { position: relative; display: inline-block; width: 100%; }
27
+ .ant-cascader-dropdown { position: absolute; left: 0; top: 36px; display: flex; border: 1px solid #999; border-radius: 4px; background: #fff; box-shadow: 0 2px 8px rgba(0,0,0,0.15); z-index: 1000; }
28
+ .ant-cascader-dropdown-hidden { display: none; }
29
+ .ant-cascader-menu { min-width: 160px; max-height: 220px; overflow: auto; border-right: 1px solid #eee; }
30
+ .ant-cascader-menu:last-child { border-right: none; }
31
+ .ant-cascader-menu-item { padding: 6px 8px; cursor: pointer; white-space: nowrap; }
32
+ .ant-cascader-menu-item:hover { background: #f5f5f5; }
33
+ .ant-cascader-menu-item-expand::after { content: ' ▶'; color: #999; }
34
+ </style>
35
+ </head>
36
+ <body>
37
+ <h2 style="margin:16px 16px 64px;">Dropdown Components Test Page</h2>
38
+
39
+ <!-- Native select (dropdown_id suggestion: 13) -->
40
+ <div id="native-container" class="section">
41
+ <label for="native-select" style="display:none;">Native</label>
42
+ <select id="native-select" class="is-medium" style="width:100%; height:100%;">
43
+ <option value="all">All fields</option>
44
+ <option value="title">Title</option>
45
+ <option value="authors">Author(s)</option>
46
+ </select>
47
+ </div>
48
+
49
+ <!-- Ant Design Select mimic (dropdown_id suggestion: 101) -->
50
+ <div id="ant-select-container" class="section">
51
+ <div class="ant-select" id="ant-select">
52
+ <div class="ant-select-selector" aria-expanded="false">Select an option</div>
53
+ <div class="ant-select-dropdown ant-select-dropdown-hidden" id="ant-select-dropdown">
54
+ <div class="ant-select-item-option" data-value="chat-model">
55
+ <span class="ant-select-item-option-content">Chat model</span>
56
+ </div>
57
+ <div class="ant-select-item-option" data-value="reasoning-model">
58
+ <span class="ant-select-item-option-content">Reasoning model</span>
59
+ </div>
60
+ <div class="ant-select-item-option" data-value="fast-model">
61
+ <span class="ant-select-item-option-content">Fast model</span>
62
+ </div>
63
+ </div>
64
+ </div>
65
+ </div>
66
+
67
+ <!-- Ant Design Cascader mimic (dropdown_id suggestion: 301) -->
68
+ <div id="ant-cascader-container" class="section">
69
+ <div class="ant-cascader" id="ant-cascader">
70
+ <div class="ant-select-selector" aria-expanded="false">Select location</div>
71
+ <div class="ant-cascader-dropdown ant-cascader-dropdown-hidden" id="ant-cascader-dropdown">
72
+ <div class="ant-cascader-menu" id="cascader-level-0"></div>
73
+ <div class="ant-cascader-menu" id="cascader-level-1"></div>
74
+ <div class="ant-cascader-menu" id="cascader-level-2"></div>
75
+ </div>
76
+ </div>
77
+ </div>
78
+
79
+ <script>
80
+ // Ant Select: toggle dropdown on selector click
81
+ (function() {
82
+ const select = document.getElementById('ant-select');
83
+ if (!select) return;
84
+ const selector = select.querySelector('.ant-select-selector');
85
+ const dropdown = document.getElementById('ant-select-dropdown');
86
+ selector.addEventListener('click', () => {
87
+ const hidden = dropdown.classList.contains('ant-select-dropdown-hidden');
88
+ dropdown.classList.toggle('ant-select-dropdown-hidden', !hidden ? true : false);
89
+ selector.setAttribute('aria-expanded', hidden ? 'true' : 'false');
90
+ });
91
+ dropdown.addEventListener('click', (e) => {
92
+ const item = e.target.closest('.ant-select-item-option');
93
+ if (item) {
94
+ selector.textContent = item.querySelector('.ant-select-item-option-content')?.textContent || item.textContent;
95
+ dropdown.classList.add('ant-select-dropdown-hidden');
96
+ selector.setAttribute('aria-expanded', 'false');
97
+ }
98
+ });
99
+ })();
100
+
101
+ // Ant Cascader: proper cascading behavior (click L0 -> show L1; click L1 -> show L2 or select if leaf)
102
+ (function() {
103
+ const cascader = document.getElementById('ant-cascader');
104
+ if (!cascader) return;
105
+ const selector = cascader.querySelector('.ant-select-selector');
106
+ const dropdown = document.getElementById('ant-cascader-dropdown');
107
+ const menu0 = document.getElementById('cascader-level-0');
108
+ const menu1 = document.getElementById('cascader-level-1');
109
+ const menu2 = document.getElementById('cascader-level-2');
110
+
111
+ // Data model
112
+ const data = {
113
+ 'Asia': {
114
+ 'China': ['Beijing', 'Shanghai'],
115
+ 'Japan': []
116
+ },
117
+ 'Europe': {
118
+ 'Germany': [],
119
+ 'France': []
120
+ }
121
+ };
122
+
123
+ function clearMenu(menuEl) {
124
+ while (menuEl.firstChild) menuEl.removeChild(menuEl.firstChild);
125
+ }
126
+
127
+ function renderMenuItems(menuEl, items, hasChildrenFn) {
128
+ clearMenu(menuEl);
129
+ items.forEach(text => {
130
+ const div = document.createElement('div');
131
+ div.className = 'ant-cascader-menu-item';
132
+ div.textContent = text;
133
+ if (hasChildrenFn && hasChildrenFn(text)) {
134
+ div.classList.add('ant-cascader-menu-item-expand');
135
+ }
136
+ menuEl.appendChild(div);
137
+ });
138
+ }
139
+
140
+ // Initialize level 0 only
141
+ renderMenuItems(menu0, Object.keys(data), (key) => Object.keys(data[key] || {}).length > 0);
142
+ clearMenu(menu1);
143
+ clearMenu(menu2);
144
+
145
+ selector.addEventListener('click', () => {
146
+ const hidden = dropdown.classList.contains('ant-cascader-dropdown-hidden');
147
+ dropdown.classList.toggle('ant-cascader-dropdown-hidden', !hidden ? true : false);
148
+ selector.setAttribute('aria-expanded', hidden ? 'true' : 'false');
149
+ });
150
+
151
+ // Level 0 -> render Level 1
152
+ menu0.addEventListener('click', (e) => {
153
+ const item = e.target.closest('.ant-cascader-menu-item');
154
+ if (!item) return;
155
+ const topKey = item.textContent.trim();
156
+ const childrenObj = data[topKey] || {};
157
+ const level1Items = Object.keys(childrenObj);
158
+ renderMenuItems(menu1, level1Items, (key) => (childrenObj[key] || []).length > 0);
159
+ clearMenu(menu2);
160
+ });
161
+
162
+ // Level 1 -> render Level 2 or select if leaf
163
+ menu1.addEventListener('click', (e) => {
164
+ const item = e.target.closest('.ant-cascader-menu-item');
165
+ if (!item) return;
166
+ const secondKey = item.textContent.trim();
167
+
168
+ // Find parent (topKey) by scanning data
169
+ let topKey = null;
170
+ for (const k of Object.keys(data)) {
171
+ if (Object.keys(data[k] || {}).includes(secondKey)) { topKey = k; break; }
172
+ }
173
+ const level2Items = (topKey && data[topKey] && data[topKey][secondKey]) ? data[topKey][secondKey] : [];
174
+ if (level2Items.length === 0) {
175
+ selector.textContent = secondKey;
176
+ dropdown.classList.add('ant-cascader-dropdown-hidden');
177
+ selector.setAttribute('aria-expanded', 'false');
178
+ return;
179
+ }
180
+ renderMenuItems(menu2, level2Items, () => false);
181
+ });
182
+
183
+ // Level 2 -> select and close
184
+ menu2.addEventListener('click', (e) => {
185
+ const item = e.target.closest('.ant-cascader-menu-item');
186
+ if (!item) return;
187
+ selector.textContent = item.textContent.trim();
188
+ dropdown.classList.add('ant-cascader-dropdown-hidden');
189
+ selector.setAttribute('aria-expanded', 'false');
190
+ });
191
+ })();
192
+ </script>
193
+ </body>
194
+ </html>
webqa-agent.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import asyncio
4
+ import os
5
+ import subprocess
6
+ import sys
7
+ import traceback
8
+
9
+ import yaml
10
+ from playwright.async_api import Error as PlaywrightError
11
+ from playwright.async_api import async_playwright
12
+
13
+ from webqa_agent.executor import ParallelMode
14
+
15
+
16
+ def find_config_file(args_config=None):
17
+ """Intelligently find configuration file."""
18
+ # 1. Command line arguments have highest priority
19
+ if args_config:
20
+ if os.path.isfile(args_config):
21
+ print(f"✅ Using specified config file: {args_config}")
22
+ return args_config
23
+ else:
24
+ raise FileNotFoundError(f"❌ Specified config file not found: {args_config}")
25
+
26
+ # 2. Search default locations by priority
27
+ current_dir = os.getcwd()
28
+ script_dir = os.path.dirname(os.path.abspath(__file__))
29
+
30
+ default_paths = [
31
+ os.path.join(current_dir, "config", "config.yaml"), # config in current directory
32
+ os.path.join(script_dir, "config", "config.yaml"), # config in script directory
33
+ os.path.join(current_dir, "config.yaml"), # compatible location in current directory
34
+ os.path.join(script_dir, "config.yaml"), # compatible location in script directory
35
+ "/app/config/config.yaml", # absolute path in Docker container
36
+ ]
37
+
38
+ for path in default_paths:
39
+ if os.path.isfile(path):
40
+ print(f"✅ Auto-discovered config file: {path}")
41
+ return path
42
+
43
+ # If none found, provide clear error message
44
+ print("❌ Config file not found, please check these locations:")
45
+ for path in default_paths:
46
+ print(f" - {path}")
47
+ raise FileNotFoundError("Config file does not exist")
48
+
49
+
50
+ def load_yaml(path):
51
+ if not os.path.isfile(path):
52
+ print(f"[ERROR] Config file not found: {path}", file=sys.stderr)
53
+ sys.exit(1)
54
+ try:
55
+ with open(path, "r", encoding="utf-8") as f:
56
+ return yaml.safe_load(f)
57
+ except Exception as e:
58
+ print(f"[ERROR] Failed to read YAML: {e}", file=sys.stderr)
59
+ sys.exit(1)
60
+
61
+
62
+ async def check_playwright_browsers_async():
63
+ try:
64
+ async with async_playwright() as p:
65
+ browser = await p.chromium.launch(headless=True)
66
+ await browser.close()
67
+ print("✅ Playwright browsers available (Async API startup successful)")
68
+ return True
69
+ except PlaywrightError as e:
70
+ print(f"⚠️ Playwright browsers unavailable (Async API failed): {e}")
71
+ return False
72
+ except Exception as e:
73
+ print(f"❌ Playwright check exception: {e}")
74
+ return False
75
+
76
+
77
+ def check_lighthouse_installation():
78
+ """Check if Lighthouse is properly installed."""
79
+ # Get project root directory and current working directory
80
+ script_dir = os.path.dirname(os.path.abspath(__file__))
81
+ current_dir = os.getcwd()
82
+
83
+ # Determine OS type, lighthouse is .cmd file on Windows
84
+ is_windows = os.name == "nt"
85
+ lighthouse_exe = "lighthouse.cmd" if is_windows else "lighthouse"
86
+
87
+ # Possible lighthouse paths (local installation priority)
88
+ lighthouse_paths = [
89
+ os.path.join(current_dir, "node_modules", ".bin", lighthouse_exe), # local installation in current directory
90
+ os.path.join(script_dir, "node_modules", ".bin", lighthouse_exe), # local installation in script directory
91
+ "lighthouse", # global installation path (fallback)
92
+ ]
93
+
94
+ # Add Docker path only in non-Windows environments
95
+ if not is_windows:
96
+ lighthouse_paths.insert(-1, os.path.join("/app", "node_modules", ".bin", "lighthouse"))
97
+
98
+ for lighthouse_path in lighthouse_paths:
99
+ try:
100
+ result = subprocess.run([lighthouse_path, "--version"], capture_output=True, text=True, timeout=10)
101
+ if result.returncode == 0:
102
+ version = result.stdout.strip()
103
+ path_type = "Local installation" if "node_modules" in lighthouse_path else "Global installation"
104
+ print(f"✅ Lighthouse installation successful, version: {version} ({path_type})")
105
+ return True
106
+ except subprocess.TimeoutExpired:
107
+ continue
108
+ except FileNotFoundError:
109
+ continue
110
+ except Exception:
111
+ continue
112
+
113
+ print("❌ Lighthouse not found, checked paths:")
114
+ for path in lighthouse_paths:
115
+ print(f" - {path}")
116
+ print("Please confirm Lighthouse is properly installed: `npm install lighthouse chrome-launcher`")
117
+ return False
118
+
119
+
120
+ def check_nuclei_installation():
121
+ """Check if Nuclei is properly installed."""
122
+ try:
123
+ # Check if nuclei command is available
124
+ result = subprocess.run(["nuclei", "-version"], capture_output=True, text=True, timeout=10)
125
+ if result.returncode == 0:
126
+ version = result.stdout.strip()
127
+ print(f"✅ Nuclei installation successful, version: {version}")
128
+ return True
129
+ else:
130
+ print(f"⚠️ Nuclei command execution failed: {result.stderr}")
131
+ return False
132
+ except subprocess.TimeoutExpired:
133
+ print("❌ Nuclei check timeout")
134
+ return False
135
+ except FileNotFoundError:
136
+ print("❌ Nuclei not installed or not in PATH")
137
+ return False
138
+ except Exception as e:
139
+ print(f"❌ Nuclei check exception: {e}")
140
+ return False
141
+
142
+
143
+ def validate_and_build_llm_config(cfg):
144
+ """Validate and build LLM configuration, environment variables take priority over config file."""
145
+ # Read from config file
146
+ llm_cfg_raw = cfg.get("llm_config", {})
147
+
148
+ # Environment variables take priority over config file
149
+ api_key = os.getenv("OPENAI_API_KEY") or llm_cfg_raw.get("api_key", "")
150
+ base_url = os.getenv("OPENAI_BASE_URL") or llm_cfg_raw.get("base_url", "")
151
+ model = llm_cfg_raw.get("model", "gpt-4o-mini")
152
+ # Sampling configuration: default temperature is 0.1; top_p not set by default
153
+ temperature = llm_cfg_raw.get("temperature", 0.1)
154
+ top_p = llm_cfg_raw.get("top_p")
155
+
156
+ # Validate required fields
157
+ if not api_key:
158
+ raise ValueError(
159
+ "❌ LLM API Key not configured! Please set one of the following:\n"
160
+ " - Environment variable: OPENAI_API_KEY\n"
161
+ " - Config file: llm_config.api_key"
162
+ )
163
+
164
+ if not base_url:
165
+ print("⚠️ base_url not set, will use OpenAI default address")
166
+ base_url = "https://api.openai.com/v1"
167
+
168
+ llm_config = {
169
+ "api": "openai",
170
+ "model": model,
171
+ "api_key": api_key,
172
+ "base_url": base_url,
173
+ "temperature": temperature,
174
+ }
175
+ if top_p is not None:
176
+ llm_config["top_p"] = top_p
177
+
178
+ # Show configuration source (hide sensitive information)
179
+ api_key_masked = f"{api_key[:8]}...{api_key[-4:]}" if len(api_key) > 12 else "***"
180
+ env_api_key = bool(os.getenv("OPENAI_API_KEY"))
181
+ env_base_url = bool(os.getenv("OPENAI_BASE_URL"))
182
+
183
+ print("✅ LLM configuration validation successful:")
184
+ print(f" - API Key: {api_key_masked} ({'Environment variable' if env_api_key else 'Config file'})")
185
+ print(f" - Base URL: {base_url} ({'Environment variable' if env_base_url else 'Config file/Default'})")
186
+ print(f" - Model: {model}")
187
+ print(f" - Temperature: {temperature}")
188
+ if top_p is not None:
189
+ print(f" - Top_p: {top_p}")
190
+
191
+ return llm_config
192
+
193
+
194
+ def build_test_configurations(cfg, cookies=None):
195
+ tests = []
196
+ tconf = cfg.get("test_config", {})
197
+
198
+ # Docker environment detection: force headless mode
199
+ is_docker = os.getenv("DOCKER_ENV") == "true"
200
+ config_headless = cfg.get("browser_config", {}).get("headless", True)
201
+
202
+ if is_docker and not config_headless:
203
+ print("⚠️ Docker environment detected, forcing headless mode")
204
+ headless = True
205
+ else:
206
+ headless = config_headless
207
+
208
+ base_browser = {
209
+ "viewport": cfg.get("browser_config", {}).get("viewport", {"width": 1280, "height": 720}),
210
+ "headless": headless,
211
+ }
212
+
213
+ # function test
214
+ if tconf.get("function_test", {}).get("enabled"):
215
+
216
+ if tconf["function_test"].get("type") == "ai":
217
+ tests.append(
218
+ {
219
+ "test_type": "ui_agent_langgraph",
220
+ "enabled": True,
221
+ "browser_config": base_browser,
222
+ "test_specific_config": {
223
+ "cookies": cookies,
224
+ "business_objectives": tconf["function_test"].get("business_objectives", ""),
225
+ },
226
+ }
227
+ )
228
+ else:
229
+ tests += [
230
+ {
231
+ "test_type": "basic_test",
232
+ "enabled": True,
233
+ "browser_config": base_browser,
234
+ "test_specific_config": {},
235
+ }
236
+ ]
237
+
238
+ # ux test
239
+ if tconf.get("ux_test", {}).get("enabled"):
240
+ tests.append(
241
+ {
242
+ "test_type": "ux_test",
243
+ "enabled": True,
244
+ "browser_config": base_browser,
245
+ "test_specific_config": {},
246
+ }
247
+ )
248
+
249
+ # performance test
250
+ if tconf.get("performance_test", {}).get("enabled"):
251
+ tests.append(
252
+ {
253
+ "test_type": "performance",
254
+ "enabled": True,
255
+ "browser_config": base_browser,
256
+ "test_specific_config": {},
257
+ }
258
+ )
259
+
260
+ # security test
261
+ if tconf.get("security_test", {}).get("enabled"):
262
+ tests.append(
263
+ {
264
+ "test_type": "security",
265
+ "enabled": True,
266
+ "browser_config": base_browser,
267
+ "test_specific_config": {},
268
+ }
269
+ )
270
+
271
+ return tests
272
+
273
+
274
+ async def run_tests(cfg):
275
+ # 0. Display runtime environment information
276
+ is_docker = os.getenv("DOCKER_ENV") == "true"
277
+ print(f"🏃 Runtime environment: {'Docker container' if is_docker else 'Local environment'}")
278
+ if is_docker:
279
+ print("🐳 Docker mode: automatically enable headless browser")
280
+
281
+ # 1. Check required tools based on configuration
282
+ tconf = cfg.get("test_config", {})
283
+
284
+ # Display enabled test types
285
+ enabled_tests = []
286
+ if tconf.get("function_test", {}).get("enabled"):
287
+ test_type = tconf.get("function_test", {}).get("type", "default")
288
+ enabled_tests.append(f"Function Test ({test_type})")
289
+ if tconf.get("ux_test", {}).get("enabled"):
290
+ enabled_tests.append("User Experience Test")
291
+ if tconf.get("performance_test", {}).get("enabled"):
292
+ enabled_tests.append("Performance Test")
293
+ if tconf.get("security_test", {}).get("enabled"):
294
+ enabled_tests.append("Security Test")
295
+
296
+ if enabled_tests:
297
+ print(f"📋 Enabled test types: {', '.join(enabled_tests)}")
298
+ print("🔧 Checking required tools based on configuration...")
299
+ else:
300
+ print("⚠️ No test types enabled, please check configuration file")
301
+ sys.exit(1)
302
+
303
+ # Check if browser is needed (most tests require it)
304
+ needs_browser = any(
305
+ [
306
+ tconf.get("function_test", {}).get("enabled"),
307
+ tconf.get("ux_test", {}).get("enabled"),
308
+ tconf.get("performance_test", {}).get("enabled"),
309
+ tconf.get("security_test", {}).get("enabled"),
310
+ ]
311
+ )
312
+
313
+ if needs_browser:
314
+ print("🔍 Checking Playwright browsers...")
315
+ ok = await check_playwright_browsers_async()
316
+ if not ok:
317
+ print("Please manually run: `playwright install` to install browser binaries, then retry.", file=sys.stderr)
318
+ sys.exit(1)
319
+
320
+ # Check if Lighthouse is needed (performance test)
321
+ if tconf.get("performance_test", {}).get("enabled"):
322
+ print("🔍 Checking Lighthouse installation...")
323
+ lighthouse_ok = check_lighthouse_installation()
324
+ if not lighthouse_ok:
325
+ print("Please confirm Lighthouse is properly installed: `npm install lighthouse chrome-launcher`", file=sys.stderr)
326
+ sys.exit(1)
327
+
328
+ # Check if Nuclei is needed (security test)
329
+ if tconf.get("security_test", {}).get("enabled"):
330
+ print("🔍 Checking Nuclei installation...")
331
+ nuclei_ok = check_nuclei_installation()
332
+ if not nuclei_ok:
333
+ print("Please confirm Nuclei is properly installed and in PATH", file=sys.stderr)
334
+ sys.exit(1)
335
+
336
+ # Validate and build LLM configuration
337
+ try:
338
+ llm_config = validate_and_build_llm_config(cfg)
339
+ except ValueError as e:
340
+ print(f"[ERROR] {e}", file=sys.stderr)
341
+ sys.exit(1)
342
+
343
+ # Build test_configurations
344
+ cookies = []
345
+ test_configurations = build_test_configurations(cfg, cookies=cookies)
346
+
347
+ target_url = cfg.get("target", {}).get("url", "")
348
+
349
+ # Call executor
350
+ try:
351
+ # Read concurrency from config (default 2), allow users to specify in config.target.max_concurrent_tests
352
+ raw_concurrency = cfg.get("target", {}).get("max_concurrent_tests", 2)
353
+ try:
354
+ max_concurrent_tests = int(raw_concurrency)
355
+ if max_concurrent_tests < 1:
356
+ raise ValueError
357
+ except Exception:
358
+ print(f"⚠️ Invalid concurrency setting: {raw_concurrency}, fallback to 2")
359
+ max_concurrent_tests = 2
360
+
361
+ print(f"⚙️ Concurrency: {max_concurrent_tests}")
362
+
363
+ parallel_mode = ParallelMode([], max_concurrent_tests=max_concurrent_tests)
364
+ results, report_path, html_report_path, result_count = await parallel_mode.run(
365
+ url=target_url, llm_config=llm_config, test_configurations=test_configurations,
366
+ log_cfg=cfg.get("log", {"level": "info"}),
367
+ report_cfg=cfg.get("report", {"language": "en-US"})
368
+ )
369
+ if result_count:
370
+ print(f"🔢 Total evaluations: {result_count.get('total', 0)}")
371
+ print(f"✅ Passed: {result_count.get('passed', 0)}")
372
+ print(f"❌ Failed: {result_count.get('failed', 0)}")
373
+
374
+ if html_report_path:
375
+ print("HTML report path: ", html_report_path)
376
+ else:
377
+ print("HTML report generation failed")
378
+ except Exception:
379
+ print("Test execution failed, stack trace:", file=sys.stderr)
380
+ traceback.print_exc()
381
+ sys.exit(1)
382
+
383
+
384
+ def parse_args():
385
+ parser = argparse.ArgumentParser(description="WebQA Agent Test Entry Point")
386
+ parser.add_argument("--config", "-c", help="YAML configuration file path (optional, default auto-search config/config.yaml)")
387
+ return parser.parse_args()
388
+
389
+
390
+ def main():
391
+ args = parse_args()
392
+
393
+ # Intelligently find configuration file
394
+ try:
395
+ config_path = find_config_file(args.config)
396
+ cfg = load_yaml(config_path)
397
+ except FileNotFoundError as e:
398
+ print(f"[ERROR] {e}", file=sys.stderr)
399
+ sys.exit(1)
400
+
401
+ # Run tests
402
+ asyncio.run(run_tests(cfg))
403
+
404
+
405
+ if __name__ == "__main__":
406
+ main()
webqa_agent/__init__.py ADDED
File without changes
webqa_agent/actions/__init__.py ADDED
File without changes
webqa_agent/actions/action_executor.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ from typing import Dict, List, Optional
4
+
5
+
6
+ class ActionExecutor:
7
+ def __init__(self, action_handler):
8
+ self._actions = action_handler
9
+ self._action_map = {
10
+ "Tap": self._execute_tap,
11
+ "Hover": self._execute_hover,
12
+ "Sleep": self._execute_sleep,
13
+ "Input": self._execute_input,
14
+ "Clear": self._execute_clear,
15
+ "Scroll": self._execute_scroll,
16
+ "KeyboardPress": self._execute_keyboard_press,
17
+ "FalsyConditionStatement": self._execute_falsy,
18
+ "Check": self._execute_check,
19
+ "GetNewPage": self._execute_get_new_page,
20
+ "Upload": self._execute_upload,
21
+ "SelectDropdown": self._execute_select_dropdown,
22
+ "Drag": self._execute_drag,
23
+ "GoToPage": self._execute_go_to_page, # Added missing action
24
+ "GoBack": self._execute_go_back, # Added browser back navigation
25
+ }
26
+
27
+ async def initialize(self):
28
+ return self
29
+
30
+ async def execute(self, action):
31
+ try:
32
+ # Validate the action
33
+ action_type = action.get("type")
34
+ if not action_type:
35
+ logging.error("Action type is required")
36
+ return False
37
+
38
+ # Get the corresponding execution function
39
+ execute_func = self._action_map.get(action_type)
40
+ if not execute_func:
41
+ logging.error(f"Unknown action type: {action_type}")
42
+ return False
43
+
44
+ # Execute the action
45
+ logging.debug(f"Executing action: {action_type}")
46
+ return await execute_func(action)
47
+
48
+ except Exception as e:
49
+ logging.error(f"Action execution failed: {str(e)}")
50
+ return {"success": False, "message": f"Action execution failed with an exception: {e}"}
51
+
52
+ def _validate_params(self, action, required_params):
53
+ for param in required_params:
54
+ keys = param.split(".")
55
+ value = action
56
+ for key in keys:
57
+ value = value.get(key)
58
+ if value is None:
59
+ if action["type"] == "Scroll" and key == "distance":
60
+ continue
61
+ logging.error(f"Missing required parameter: {param}")
62
+ return False # Return False to indicate validation failure
63
+ return True # Return True if all parameters are present
64
+
65
+ # Individual action execution methods - NO SCREENSHOTS
66
+ async def _execute_clear(self, action):
67
+ """Execute clear action on an input field."""
68
+ if not self._validate_params(action, ["locate.id"]):
69
+ return {"success": False, "message": "Missing locate.id for clear action"}
70
+ success = await self._actions.clear(action.get("locate").get("id"))
71
+ if success:
72
+ return {"success": True, "message": "Clear action successful."}
73
+ else:
74
+ return {"success": False, "message": "Clear action failed. The element might not be clearable."}
75
+
76
+ async def _execute_tap(self, action):
77
+ """Execute tap/click action."""
78
+ if not self._validate_params(action, ["locate.id"]):
79
+ return {"success": False, "message": "Missing locate.id for tap action"}
80
+ success = await self._actions.click(action.get("locate").get("id"))
81
+ if success:
82
+ return {"success": True, "message": "Tap action successful."}
83
+ else:
84
+ return {"success": False, "message": "Tap action failed. The element might not be clickable."}
85
+
86
+ async def _execute_hover(self, action):
87
+ """Execute hover action."""
88
+ if not self._validate_params(action, ["locate.id"]):
89
+ return {"success": False, "message": "Missing locate.id for hover action"}
90
+ success = await self._actions.hover(action.get("locate").get("id"))
91
+ if success:
92
+ return {"success": True, "message": "Hover action successful."}
93
+ else:
94
+ return {"success": False, "message": "Hover action failed. The element might not be hoverable."}
95
+
96
+ async def _execute_sleep(self, action):
97
+ """Execute sleep/wait action."""
98
+ if not self._validate_params(action, ["param.timeMs"]):
99
+ return {"success": False, "message": "Missing param.timeMs for sleep action"}
100
+ time_ms = action.get("param").get("timeMs")
101
+ await asyncio.sleep(time_ms / 1000)
102
+ return {"success": True, "message": f"Slept for {time_ms}ms."}
103
+
104
+ async def _execute_input(self, action):
105
+ """Execute input/type action."""
106
+ if not self._validate_params(action, ["locate.id", "param.value"]):
107
+ return {"success": False, "message": "Missing locate.id or param.value for input action"}
108
+ try:
109
+ value = action.get("param").get("value")
110
+ clear_before_type = action.get("param").get("clear_before_type", False) # Default is False
111
+ success = await self._actions.type(
112
+ action.get("locate").get("id"), value, clear_before_type=clear_before_type
113
+ )
114
+ if success:
115
+ return {"success": True, "message": "Input action successful."}
116
+ else:
117
+ return {
118
+ "success": False,
119
+ "message": "Input action failed. The element might not be available for typing.",
120
+ }
121
+ except Exception as e:
122
+ logging.error(f"Action '_execute_input' execution failed: {str(e)}")
123
+ return {"success": False, "message": f"Input action failed with an exception: {e}"}
124
+
125
+ async def _execute_scroll(self, action):
126
+ """Execute scroll action."""
127
+ if not self._validate_params(action, ["param.direction", "param.scrollType", "param.distance"]):
128
+ return {"success": False, "message": "Missing parameters for scroll action"}
129
+ direction = action.get("param").get("direction", "down")
130
+ scroll_type = action.get("param").get("scrollType", "once")
131
+ distance = action.get("param").get("distance", None)
132
+
133
+ success = await self._actions.scroll(direction, scroll_type, distance)
134
+ if success:
135
+ return {"success": True, "message": f"Scrolled {direction} successfully."}
136
+ else:
137
+ return {"success": False, "message": "Scroll action failed."}
138
+
139
+ async def _execute_keyboard_press(self, action):
140
+ """Execute keyboard press action."""
141
+ if not self._validate_params(action, ["param.value"]):
142
+ return {"success": False, "message": "Missing param.value for keyboard press action"}
143
+ success = await self._actions.keyboard_press(action.get("param").get("value"))
144
+ if success:
145
+ return {"success": True, "message": "Keyboard press successful."}
146
+ else:
147
+ return {"success": False, "message": "Keyboard press failed."}
148
+
149
+ async def _execute_falsy(self, action):
150
+ """Execute falsy condition statement."""
151
+ return {"success": True, "message": "Falsy condition met."}
152
+
153
+ async def _execute_check(self, action):
154
+ """Execute check action."""
155
+ return {"success": True, "message": "Check action completed."}
156
+
157
+ async def _execute_get_new_page(self, action):
158
+ """Execute get new page action."""
159
+ success = await self._actions.get_new_page()
160
+ if success:
161
+ return {"success": True, "message": "Successfully switched to new page."}
162
+ else:
163
+ return {"success": False, "message": "Failed to get new page."}
164
+
165
+ async def _execute_upload(self, action, file_path):
166
+ """Execute upload action."""
167
+ if not self._validate_params(action, ["locate.id"]):
168
+ return {"success": False, "message": "Missing locate.id for upload action"}
169
+ success = await self._actions.upload_file(action.get("locate").get("id"), file_path)
170
+ if success:
171
+ return {"success": True, "message": "File upload successful."}
172
+ else:
173
+ return {"success": False, "message": "File upload failed."}
174
+
175
+ async def _execute_select_dropdown(self, action):
176
+ """Execute select dropdown action."""
177
+ locate = action.get("locate", {})
178
+ dropdown_id = locate.get("dropdown_id")
179
+ option_id = locate.get("option_id")
180
+ selection_path_param = action.get("param", {}).get("selection_path")
181
+
182
+ if dropdown_id is None or selection_path_param is None:
183
+ logging.error("dropdown_id and selection_path are required for SelectDropdown")
184
+ return {"success": False, "message": "dropdown_id and selection_path are required for SelectDropdown"}
185
+
186
+ if isinstance(selection_path_param, str):
187
+ selection_path = [selection_path_param]
188
+ elif isinstance(selection_path_param, list) and selection_path_param:
189
+ selection_path = selection_path_param
190
+ else:
191
+ logging.error("selection_path must be a non-empty string or list")
192
+ return {"success": False, "message": "selection_path must be a non-empty string or list"}
193
+
194
+ try:
195
+ # choose option_id directly
196
+ if option_id is not None and len(selection_path) == 1:
197
+ logging.debug(f"Directly clicking option_id {option_id} for dropdown_id {dropdown_id}")
198
+ return await self._actions.select_dropdown_option(dropdown_id, selection_path[0], option_id=option_id)
199
+
200
+ # multi-level cascade or no option_id, use original logic
201
+ if len(selection_path) == 1:
202
+ return await self._execute_simple_selection(dropdown_id, selection_path[0])
203
+ else:
204
+ # multi-level cascade
205
+ for level, option_text in enumerate(selection_path):
206
+ select_result = await self._actions.select_cascade_level(dropdown_id, option_text, level=level)
207
+ if not select_result.get("success"):
208
+ logging.error(f"Failed to select level {level} option: {select_result.get('message')}")
209
+ return {
210
+ "success": False,
211
+ "message": f"Failed at cascade level {level}: {select_result.get('message')}",
212
+ }
213
+ if level < len(selection_path) - 1:
214
+ await asyncio.sleep(0.5)
215
+ logging.debug(f"Successfully completed cascade selection: {' -> '.join(selection_path)}")
216
+ return {"success": True, "message": "Cascade selection completed successfully"}
217
+
218
+ except Exception as e:
219
+ logging.error(f"Error in dropdown selection: {str(e)}")
220
+ return {"success": False, "message": f"An exception occurred during dropdown selection: {str(e)}"}
221
+
222
+ async def _execute_simple_selection(self, element_id, option_text):
223
+ """Execute simple single-level dropdown selection."""
224
+ try:
225
+ # get all options of dropdown
226
+ logging.debug(f"Getting dropdown options for element {element_id}")
227
+ options_result = await self._actions.get_dropdown_options(element_id)
228
+
229
+ if not options_result.get("success"):
230
+ logging.error(f"Failed to get dropdown options: {options_result.get('message')}")
231
+ return {"success": False, "message": f"Failed to get dropdown options: {options_result.get('message')}"}
232
+
233
+ options = options_result.get("options", [])
234
+ if not options:
235
+ logging.error("No options found in dropdown")
236
+ return {"success": False, "message": "No options found in dropdown"}
237
+
238
+ logging.debug(f"Found {len(options)} options in dropdown")
239
+
240
+ # use default simple decision logic
241
+ def _default_selection_logic(options: List[Dict], criteria: str) -> Optional[str]:
242
+ criteria_lower = criteria.lower()
243
+
244
+ for option in options:
245
+ if option["text"].lower() == criteria_lower:
246
+ logging.debug(f"Found exact match: {option['text']}")
247
+ return option["text"]
248
+
249
+ for option in options:
250
+ if criteria_lower in option["text"].lower():
251
+ logging.debug(f"Found contains match: {option['text']}")
252
+ return option["text"]
253
+
254
+ for option in options:
255
+ if option["text"].lower() in criteria_lower:
256
+ logging.debug(f"Found partial match: {option['text']}")
257
+ return option["text"]
258
+
259
+ # if no match, return None
260
+ logging.warning(f"No match found for criteria: {criteria}")
261
+ return None
262
+
263
+ selected_option = _default_selection_logic(options, option_text)
264
+
265
+ if not selected_option:
266
+ logging.error(f"Could not decide which option to select based on criteria: {option_text}")
267
+ available_options = [opt["text"] for opt in options]
268
+ logging.debug(f"Available options: {available_options}")
269
+ return {"success": False, "message": "No matching option found", "available_options": available_options}
270
+
271
+ logging.debug(f"Selected option: {selected_option}")
272
+
273
+ # execute select operation
274
+ select_result = await self._actions.select_dropdown_option(element_id, selected_option)
275
+
276
+ if select_result.get("success"):
277
+ logging.debug(f"Successfully completed dropdown selection: {selected_option}")
278
+ return {"success": True, "message": "Option selected successfully"}
279
+ else:
280
+ logging.error(f"Failed to select option: {selected_option}")
281
+ return {"success": False, "message": f"Failed to select option: {select_result.get('message')}"}
282
+
283
+ except Exception as e:
284
+ logging.error(f"Error in simple dropdown selection: {str(e)}")
285
+ return {"success": False, "message": f"An exception occurred: {str(e)}"}
286
+
287
+ async def _execute_drag(self, action):
288
+ """Execute drag action."""
289
+ if not self._validate_params(action, ["param.sourceCoordinates", "param.targetCoordinates"]):
290
+ return {"success": False, "message": "Missing coordinates for drag action"}
291
+ success = await self._actions.drag(
292
+ action.get("param").get("sourceCoordinates"), action.get("param").get("targetCoordinates")
293
+ )
294
+ if success:
295
+ return {"success": True, "message": "Drag action successful."}
296
+ else:
297
+ return {"success": False, "message": "Drag action failed."}
298
+
299
+ async def _execute_go_to_page(self, action):
300
+ """Execute go to page action - the missing navigation action."""
301
+ url = action.get("param", {}).get("url")
302
+ if not url:
303
+ return {"success": False, "message": "Missing URL parameter for go to page action"}
304
+
305
+ try:
306
+ # Use smart navigation if available
307
+ if hasattr(self._actions, 'smart_navigate_to_page'):
308
+ page = getattr(self._actions, 'page', None)
309
+ if page:
310
+ navigation_performed = await self._actions.smart_navigate_to_page(page, url)
311
+ message = "Navigated to page" if navigation_performed else "Already on target page"
312
+ return {"success": True, "message": message}
313
+
314
+ # Fallback to regular navigation
315
+ if hasattr(self._actions, 'go_to_page') and hasattr(self._actions, 'page'):
316
+ await self._actions.go_to_page(self._actions.page, url)
317
+ return {"success": True, "message": "Successfully navigated to page"}
318
+
319
+ return {"success": False, "message": "Navigation method not available"}
320
+
321
+ except Exception as e:
322
+ logging.error(f"Go to page action failed: {str(e)}")
323
+ return {"success": False, "message": f"Navigation failed: {str(e)}", "playwright_error": str(e)}
324
+
325
+ async def _execute_go_back(self, action):
326
+ """Execute browser back navigation action."""
327
+ try:
328
+ if hasattr(self._actions, 'go_back'):
329
+ success = await self._actions.go_back()
330
+ if success:
331
+ return {"success": True, "message": "Successfully navigated back to previous page"}
332
+ else:
333
+ return {"success": False, "message": "Go back navigation failed"}
334
+ else:
335
+ return {"success": False, "message": "Go back action not supported by action handler"}
336
+ except Exception as e:
337
+ logging.error(f"Go back action failed: {str(e)}")
338
+ return {"success": False, "message": f"Go back failed: {str(e)}", "playwright_error": str(e)}
webqa_agent/actions/action_handler.py ADDED
@@ -0,0 +1,1431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import json
4
+ import os
5
+ import re
6
+ from typing import Any, Dict, List, Optional, Union
7
+
8
+ from playwright.async_api import Page
9
+
10
+ from webqa_agent.browser.driver import *
11
+
12
+
13
+ class ActionHandler:
14
+ def __init__(self):
15
+ self.page_data = {}
16
+ self.page_element_buffer = {} # page element buffer
17
+ self.driver = None
18
+ self.page = None
19
+
20
+ async def initialize(self, page: Page | None = None, driver=None):
21
+ if page is not None:
22
+ self.page = page
23
+ if driver is not None:
24
+ self.driver = driver
25
+ return self
26
+ return self
27
+
28
+ async def update_element_buffer(self, new_element):
29
+ """Update page_element_buffer :param new_buffer: CrawlerHandler fetched
30
+ latest element buffer."""
31
+ self.page_element_buffer = new_element
32
+
33
+ async def go_to_page(self, page: Page, url: str, cookies=None):
34
+ # if not self.driver:
35
+ # self.driver = await Driver.getInstance()
36
+ self.page = page
37
+ if cookies:
38
+ try:
39
+ cookies = json.loads(cookies)
40
+ await self.page.context.add_cookies(cookies)
41
+ except Exception as e:
42
+ raise Exception(f'add context cookies error: {e}')
43
+
44
+ await self.page.goto(url=url, wait_until='domcontentloaded')
45
+ await self.page.wait_for_load_state('networkidle', timeout=60000)
46
+
47
+ async def smart_navigate_to_page(self, page: Page, url: str, cookies=None) -> bool:
48
+ """Smart navigation to target page, avoiding redundant navigation.
49
+
50
+ Args:
51
+ page: Playwright page object
52
+ url: Target URL
53
+ cookies: Optional cookies
54
+
55
+ Returns:
56
+ bool: Whether navigation operation was performed
57
+ """
58
+ try:
59
+ # Get current page URL
60
+ current_url = page.url
61
+ logging.debug(f'Smart navigation check - Current URL: {current_url}, Target URL: {url}')
62
+
63
+ # Enhanced URL normalization function to handle various domain variations
64
+ def normalize_url(u):
65
+ from urllib.parse import urlparse
66
+
67
+ try:
68
+ parsed = urlparse(u)
69
+ # Handle domain variations: remove www prefix, unify lowercase
70
+ netloc = parsed.netloc.lower()
71
+ if netloc.startswith('www.'):
72
+ netloc = netloc[4:] # Remove www.
73
+
74
+ # Standardize path: remove trailing slash
75
+ path = parsed.path.rstrip('/')
76
+
77
+ # Build normalized URL
78
+ normalized = f'{parsed.scheme}://{netloc}{path}'
79
+ return normalized
80
+ except Exception:
81
+ # If parsing fails, return lowercase version of original URL
82
+ return u.lower()
83
+
84
+ current_normalized = normalize_url(current_url)
85
+ target_normalized = normalize_url(url)
86
+
87
+ logging.debug(f'Normalized URLs - Current: {current_normalized}, Target: {target_normalized}')
88
+
89
+ if current_normalized == target_normalized:
90
+ logging.debug('Already on target page (normalized match), skipping navigation')
91
+ return False
92
+
93
+ # More flexible URL matching: if domain is same and path is similar, also consider as match
94
+ def extract_domain(u):
95
+ try:
96
+ from urllib.parse import urlparse
97
+
98
+ parsed = urlparse(u)
99
+ domain = parsed.netloc.lower()
100
+ if domain.startswith('www.'):
101
+ domain = domain[4:]
102
+ return domain
103
+ except Exception:
104
+ return ''
105
+
106
+ def extract_path(u):
107
+ try:
108
+ from urllib.parse import urlparse
109
+
110
+ parsed = urlparse(u)
111
+ return parsed.path.rstrip('/')
112
+ except Exception:
113
+ return ''
114
+
115
+ current_domain = extract_domain(current_url)
116
+ target_domain = extract_domain(url)
117
+ current_path = extract_path(current_url)
118
+ target_path = extract_path(url)
119
+
120
+ # If domain is same and path is exactly same, or homepage variant
121
+ if current_domain == target_domain and (
122
+ current_path == target_path
123
+ or current_path == ''
124
+ and target_path == ''
125
+ or current_path == '/'
126
+ and target_path == ''
127
+ or current_path == ''
128
+ and target_path == '/'
129
+ ):
130
+ logging.debug(f'Domain and path match detected ({current_domain}{current_path}), skipping navigation')
131
+ return False
132
+
133
+ # Check if page is still valid
134
+ try:
135
+ await page.title() # Simple check if page responds
136
+ logging.debug(f'Page is responsive, proceeding with navigation from {current_url} to {url}')
137
+ except Exception as e:
138
+ logging.warning(f'Page check failed: {e}, forcing navigation')
139
+
140
+ # Need to perform navigation
141
+ await self.go_to_page(page, url, cookies)
142
+ logging.debug(f'Successfully navigated to {url}')
143
+ return True
144
+
145
+ except Exception as e:
146
+ logging.error(f'Smart navigation failed: {e}, falling back to regular navigation')
147
+ # Perform regular navigation on error
148
+ await self.go_to_page(page, url, cookies)
149
+ return True
150
+
151
+ async def set_overflow_hidden(self):
152
+ await self.page.evaluate("document.body.style.overflow = 'hidden'")
153
+
154
+ async def close_page(self) -> None:
155
+ """Close the current page."""
156
+ if self.page:
157
+ try:
158
+ await self.page.close()
159
+ logging.debug('Page closed successfully')
160
+ except Exception as e:
161
+ logging.error(f'Error closing page: {e}')
162
+
163
+ def set_page_element_buffer(self, element_buffer: Dict[int, Dict]) -> None:
164
+ """Set the page element buffer for action execution."""
165
+ self.page_element_buffer = element_buffer
166
+
167
+ async def scroll(self, direction: str = 'down', scrollType: str = 'once', distance: Optional[int] = None) -> bool:
168
+ """Scroll page.
169
+ Args:
170
+ direction: 'up' or 'down'
171
+ scrollType: 'once' or 'untilBottom' or 'untilTop'
172
+ distance: None or Number
173
+
174
+ Returns:
175
+ bool: Whether scroll operation was performed
176
+ """
177
+ logging.debug('Start scrolling page')
178
+
179
+ # Validate inputs to avoid silent no-ops
180
+ allowed_directions = {'up', 'down'}
181
+ allowed_scroll_types = {'once', 'untilBottom', 'untilTop'}
182
+
183
+ if direction not in allowed_directions:
184
+ logging.error(f"Invalid direction '{direction}'. Allowed: {sorted(list(allowed_directions))}")
185
+ return False
186
+
187
+ if scrollType not in allowed_scroll_types:
188
+ logging.error(f"Invalid scrollType '{scrollType}'. Allowed: {sorted(list(allowed_scroll_types))}")
189
+ return False
190
+
191
+ if distance is not None:
192
+ try:
193
+ distance = int(distance)
194
+ except (TypeError, ValueError):
195
+ logging.error(f"Invalid distance '{distance}'. Must be an integer or None")
196
+ return False
197
+ if distance < 0:
198
+ logging.error(f"Invalid distance '{distance}'. Must be >= 0")
199
+ return False
200
+
201
+ async def perform_scroll(): # Execute scroll operation
202
+ if direction == 'up':
203
+ await self.page.evaluate(f'(document.scrollingElement || document.body).scrollTop -= {distance};')
204
+ elif direction == 'down':
205
+ await self.page.evaluate(f'(document.scrollingElement || document.body).scrollTop += {distance};')
206
+
207
+ if not distance:
208
+ distance = int(await self.page.evaluate('window.innerHeight') / 2)
209
+ logging.debug(f'Scrolling distance: {distance}')
210
+
211
+ if scrollType == 'once':
212
+ await perform_scroll()
213
+ return True
214
+
215
+ elif scrollType == 'untilBottom':
216
+ prev_scroll = -1 # Record last scroll position, avoid stuck
217
+
218
+ while True:
219
+ # Get current scroll position and page total height
220
+ current_scroll = await self.page.evaluate('window.scrollY')
221
+ current_scroll_height = await self.page.evaluate('document.body.scrollHeight')
222
+
223
+ # Check if page is scrolled to the bottom
224
+ if current_scroll == prev_scroll:
225
+ logging.debug('No further scroll possible, reached the bottom.')
226
+ break
227
+
228
+ # Until bottom
229
+ if current_scroll + distance >= current_scroll_height:
230
+ distance = current_scroll_height - current_scroll
231
+ logging.debug(f'Adjusting last scroll distance to {distance}')
232
+
233
+ prev_scroll = current_scroll
234
+ await perform_scroll()
235
+ await asyncio.sleep(1)
236
+
237
+ return True
238
+
239
+ elif scrollType == 'untilTop':
240
+ prev_scroll = -1
241
+
242
+ while True:
243
+ current_scroll = await self.page.evaluate('window.scrollY')
244
+
245
+ # If already at top or no progress, stop
246
+ if current_scroll <= 0 or current_scroll == prev_scroll:
247
+ logging.debug('No further scroll possible, reached the top.')
248
+ break
249
+
250
+ # Adjust last scroll to not go past top
251
+ if current_scroll - distance <= 0:
252
+ distance = current_scroll
253
+ logging.debug(f'Adjusting last scroll distance to {distance}')
254
+
255
+ prev_scroll = current_scroll
256
+ await perform_scroll()
257
+ await asyncio.sleep(1)
258
+
259
+ return True
260
+
261
+ async def click(self, id) -> bool:
262
+ # Inject JavaScript into the page to remove the target attribute from all links
263
+ js = """
264
+ links = document.getElementsByTagName("a");
265
+ for (var i = 0; i < links.length; i++) {
266
+ links[i].removeAttribute("target");
267
+ }
268
+ """
269
+ await self.page.evaluate(js)
270
+
271
+ try:
272
+ id = str(id)
273
+ element = self.page_element_buffer.get(id)
274
+ if not element:
275
+ logging.error(f'Element with id {id} not found in buffer for click action.')
276
+ return False
277
+
278
+ logging.debug(
279
+ f"Attempting to click element: id={id}, tagName='{element.get('tagName')}', innerText='{element.get('innerText', '').strip()[:50]}', selector='{element.get('selector')}'"
280
+ )
281
+
282
+ except Exception as e:
283
+ logging.error(f'failed to get element {id}, element: {self.page_element_buffer.get(id)}, error: {e}')
284
+ return False
285
+
286
+ return await self.click_using_coordinates(element, id)
287
+
288
+ async def click_using_coordinates(self, element, id) -> bool:
289
+ """Helper function to click using coordinates."""
290
+ x = element.get('center_x')
291
+ y = element.get('center_y')
292
+ try:
293
+ if x is not None and y is not None:
294
+ logging.debug(f'mouse click at element {id}, coordinate=({x}, {y})')
295
+ try:
296
+ await self.page.mouse.click(x, y)
297
+ except Exception as e:
298
+ logging.error(f'mouse click error: {e}\nwith coordinates: ({x}, {y})')
299
+ return True
300
+ else:
301
+ logging.error('Coordinates not found in element data')
302
+ return False
303
+ except Exception as e:
304
+ logging.error(f'Error clicking using coordinates: {e}')
305
+ return False
306
+
307
+ async def hover(self, id) -> bool:
308
+ element = self.page_element_buffer.get(str(id))
309
+ if not element:
310
+ logging.error(f'Element with id {id} not found in buffer for hover action.')
311
+ return False
312
+
313
+ logging.debug(
314
+ f"Attempting to hover over element: id={id}, tagName='{element.get('tagName')}', innerText='{element.get('innerText', '').strip()[:50]}', selector='{element.get('selector')}'"
315
+ )
316
+
317
+ scroll_y = await self.page.evaluate('() => window.scrollY')
318
+
319
+ x = element.get('center_x')
320
+ y = element.get('center_y')
321
+ if x is not None and y is not None:
322
+ y = y - scroll_y
323
+ logging.debug(f'mouse hover at ({x}, {y})')
324
+ await self.page.mouse.move(x, y)
325
+ await asyncio.sleep(0.5)
326
+ return True
327
+ else:
328
+ logging.error('Coordinates not found in element data')
329
+ return False
330
+
331
+ async def wait(self, timeMs) -> bool:
332
+ """Wait for specified time.
333
+
334
+ Args:
335
+ timeMs: wait time (milliseconds)
336
+
337
+ Returns:
338
+ bool: True if success, False if failed
339
+ """
340
+ logging.debug(f'wait for {timeMs} milliseconds')
341
+ await asyncio.sleep(timeMs / 1000)
342
+ logging.debug(f'wait for {timeMs} milliseconds done')
343
+ return True
344
+
345
+ async def type(self, id, text, clear_before_type: bool = False) -> bool:
346
+ """Types text into the specified element, optionally clearing it
347
+ first."""
348
+ try:
349
+ element = self.page_element_buffer.get(str(id))
350
+ if not element:
351
+ logging.error(f'Element with id {id} not found in buffer for type action.')
352
+ return False
353
+
354
+ logging.debug(
355
+ f"Attempting to type into element: id={id}, tagName='{element.get('tagName')}', innerText='{element.get('innerText', '').strip()[:50]}', selector='{element.get('selector')}', clear_before_type={clear_before_type}"
356
+ )
357
+
358
+ if clear_before_type:
359
+ if not await self.clear(id):
360
+ logging.warning(f'Failed to clear element {id} before typing, but will attempt to type anyway.')
361
+
362
+ # click element to get focus
363
+ try:
364
+ if not await self.click(str(id)):
365
+ return False
366
+ except Exception as e:
367
+ logging.error(f"Error 'type' clicking using coordinates: {e}")
368
+ logging.error(f'id type {type(id)}, id: {id}')
369
+ return False
370
+
371
+ await asyncio.sleep(1)
372
+ # Type text with CSS validation and XPath fallback
373
+ selector = element['selector']
374
+
375
+ # First validate CSS selector format
376
+ if self._is_valid_css_selector(selector):
377
+ try:
378
+ # Try using CSS selector
379
+ await self.page.locator(selector).fill(text)
380
+ logging.debug(f"Typed '{text}' into element {id} using CSS selector: {selector}")
381
+ except Exception as css_error:
382
+ logging.warning(f'CSS selector type failed for element {id}: {css_error}')
383
+ # CSS selector failed, try XPath
384
+ xpath = element.get('xpath')
385
+ if xpath:
386
+ try:
387
+ await self.page.locator(f'xpath={xpath}').fill(text)
388
+ logging.debug(f"Typed '{text}' into element {id} using XPath fallback: {xpath}")
389
+ except Exception as xpath_error:
390
+ logging.error(
391
+ f'Both CSS and XPath type failed for element {id}. CSS error: {css_error}, XPath error: {xpath_error}'
392
+ )
393
+ return False
394
+ else:
395
+ logging.error(f'CSS selector type failed and no XPath available for element {id}')
396
+ return False
397
+ else:
398
+ logging.warning(f'Invalid CSS selector format for element {id}: {selector}')
399
+ # CSS selector format invalid, use XPath directly
400
+ xpath = element.get('xpath')
401
+ if xpath:
402
+ try:
403
+ await self.page.locator(f'xpath={xpath}').fill(text)
404
+ logging.debug(f"Typed '{text}' into element {id} using XPath: {xpath}")
405
+ except Exception as xpath_error:
406
+ logging.error(f'XPath type failed for element {id}: {xpath_error}')
407
+ return False
408
+ else:
409
+ logging.error(f'Invalid CSS selector and no XPath available for element {id}')
410
+ return False
411
+
412
+ await asyncio.sleep(1)
413
+ return True
414
+ except Exception as e:
415
+ logging.error(f'Failed to type into element {id}: {e}')
416
+ return False
417
+
418
+ @staticmethod
419
+ def _is_valid_css_selector(selector: str) -> bool:
420
+ """Validate if CSS selector format is valid.
421
+
422
+ Args:
423
+ selector: CSS selector string
424
+
425
+ Returns:
426
+ bool: True if selector format is valid, False otherwise
427
+ """
428
+ if not selector or not isinstance(selector, str):
429
+ return False
430
+
431
+ # Basic CSS selector format validation
432
+ # Check for invalid characters or format
433
+ try:
434
+ # Remove whitespace
435
+ selector = selector.strip()
436
+ if not selector:
437
+ return False
438
+
439
+ # Basic CSS selector syntax check
440
+ # Cannot start with a number (unless it's a pseudo-selector)
441
+ if re.match(r'^[0-9]', selector) and not selector.startswith(':'):
442
+ return False
443
+
444
+ # Check basic CSS selector pattern
445
+ # Allow: tag names, class names, IDs, attributes, pseudo-classes, pseudo-elements, combinators, etc.
446
+ css_pattern = r'^[a-zA-Z_\-\[\]().,:#*>+~\s="\'0-9]+$'
447
+ if not re.match(css_pattern, selector):
448
+ return False
449
+
450
+ # Check bracket matching
451
+ if selector.count('[') != selector.count(']'):
452
+ return False
453
+ if selector.count('(') != selector.count(')'):
454
+ return False
455
+
456
+ return True
457
+
458
+ except Exception:
459
+ return False
460
+
461
+ async def clear(self, id) -> bool:
462
+ """Clears the text in the specified input element."""
463
+ try:
464
+ element_to_clear = self.page_element_buffer.get(str(id))
465
+ if not element_to_clear:
466
+ logging.error(f'Element with id {id} not found in buffer for clear action.')
467
+ return False
468
+
469
+ logging.debug(
470
+ f"Attempting to clear element: id={id}, tagName='{element_to_clear.get('tagName')}', innerText='{element_to_clear.get('innerText', '').strip()[:50]}', selector='{element_to_clear.get('selector')}'"
471
+ )
472
+
473
+ # First, click the element to ensure it has focus
474
+ if not await self.click(str(id)):
475
+ logging.warning(f'Could not focus element {id} before clearing, but proceeding anyway.')
476
+
477
+ # Get the selector for the element
478
+ if 'selector' not in element_to_clear:
479
+ logging.error(f'Element {id} has no selector for clearing.')
480
+ return False
481
+
482
+ selector = element_to_clear['selector']
483
+
484
+ # Clear input with CSS validation and XPath fallback
485
+ # First validate CSS selector format
486
+ if self._is_valid_css_selector(selector):
487
+ try:
488
+ # Try using CSS selector
489
+ await self.page.locator(selector).fill('')
490
+ logging.debug(f'Cleared input for element {id} using CSS selector: {selector}')
491
+ except Exception as css_error:
492
+ logging.warning(f'CSS selector clear failed for element {id}: {css_error}')
493
+ # CSS selector failed, try XPath
494
+ xpath = element_to_clear.get('xpath')
495
+ if xpath:
496
+ try:
497
+ await self.page.locator(f'xpath={xpath}').fill('')
498
+ logging.debug(f'Cleared input for element {id} using XPath fallback: {xpath}')
499
+ except Exception as xpath_error:
500
+ logging.error(
501
+ f'Both CSS and XPath clear failed for element {id}. CSS error: {css_error}, XPath error: {xpath_error}'
502
+ )
503
+ return False
504
+ else:
505
+ logging.error(f'CSS selector clear failed and no XPath available for element {id}')
506
+ return False
507
+ else:
508
+ logging.warning(f'Invalid CSS selector format for element {id}: {selector}')
509
+ # CSS selector format invalid, use XPath directly
510
+ xpath = element_to_clear.get('xpath')
511
+ if xpath:
512
+ try:
513
+ await self.page.locator(f'xpath={xpath}').fill('')
514
+ logging.debug(f'Cleared input for element {id} using XPath: {xpath}')
515
+ except Exception as xpath_error:
516
+ logging.error(f'XPath clear failed for element {id}: {xpath_error}')
517
+ return False
518
+ else:
519
+ logging.error(f'Invalid CSS selector and no XPath available for element {id}')
520
+ return False
521
+
522
+ await asyncio.sleep(0.5)
523
+ return True
524
+ except Exception as e:
525
+ logging.error(f'Failed to clear element {id}: {e}')
526
+ return False
527
+
528
+ async def keyboard_press(self, key) -> bool:
529
+ """Press keyboard key.
530
+
531
+ Args:
532
+ key: key name
533
+
534
+ Returns:
535
+ bool: True if success, False if failed
536
+ """
537
+ await self.page.keyboard.press(key)
538
+ await asyncio.sleep(1)
539
+ return True
540
+
541
+ async def b64_page_screenshot(self, full_page=False, file_path=None, file_name=None, save_to_log=True):
542
+ """Get page screenshot (Base64 encoded)
543
+
544
+ Args:
545
+ full_page: whether to capture the whole page
546
+ file_path: screenshot save path (optional)
547
+ file_name: screenshot file name (optional)
548
+ save_to_log: whether to save to log system (default True)
549
+
550
+ Returns:
551
+ tuple: (screenshot base64 encoded, screenshot file path)
552
+ """
553
+ # get screenshot
554
+ screenshot_bytes = await self.take_screenshot(self.page, full_page=full_page, timeout=30000)
555
+
556
+ # convert to Base64
557
+ screenshot_base64 = base64.b64encode(screenshot_bytes).decode('utf-8')
558
+ base64_data = f'data:image/png;base64,{screenshot_base64}'
559
+ return base64_data
560
+
561
+ async def take_screenshot(
562
+ self,
563
+ page: Page,
564
+ full_page: bool = False,
565
+ file_path: str | None = None,
566
+ timeout: float = 120000,
567
+ ) -> bytes:
568
+ """Get page screenshot (binary)
569
+
570
+ Args:
571
+ page: page object
572
+ full_page: whether to capture the whole page
573
+ file_path: screenshot save path (only used for direct saving, not recommended in test flow)
574
+ timeout: timeout
575
+
576
+ Returns:
577
+ bytes: screenshot binary data
578
+ """
579
+ try:
580
+ try:
581
+ await page.wait_for_load_state(timeout=60000)
582
+ except Exception as e:
583
+ logging.warning(f'wait_for_load_state before screenshot failed: {e}; attempting screenshot anyway')
584
+ logging.debug('Page is fully loaded or skipped wait; taking screenshot')
585
+
586
+ # Directly capture screenshot as binary data
587
+ if file_path:
588
+ screenshot: bytes = await page.screenshot(
589
+ path=file_path,
590
+ full_page=full_page,
591
+ timeout=timeout,
592
+ )
593
+ else:
594
+ screenshot: bytes = await page.screenshot(
595
+ full_page=full_page,
596
+ timeout=timeout,
597
+ )
598
+
599
+ return screenshot
600
+
601
+ except Exception as e:
602
+ logging.warning(f'Page screenshot attempt failed: {e}; trying fallback capture')
603
+ raise
604
+
605
+ async def go_back(self) -> bool:
606
+ """Navigate back to the previous page."""
607
+ try:
608
+ await self.page.go_back()
609
+ logging.debug('Navigated back to the previous page.')
610
+ return True
611
+ except Exception as e:
612
+ logging.error(f'Failed to navigate back: {e}')
613
+ return False
614
+
615
+ async def get_new_page(self):
616
+ try:
617
+ if self.driver:
618
+ self.page = await self.driver.get_new_page()
619
+ else:
620
+ # If no driver, check current context page list
621
+ pages = self.page.context.pages if self.page else []
622
+ if len(pages) > 1:
623
+ self.page = pages[-1]
624
+ return True
625
+ except Exception as e:
626
+ logging.error(f'Failed to get new page: {e}')
627
+ return False
628
+
629
+ async def upload_file(self, id, file_path: Union[str, List[str]]) -> bool:
630
+ """File upload function.
631
+
632
+ Args:
633
+ id (str): element ID (not used for matching)
634
+ file_path (str or list): file path or path list to upload
635
+
636
+ Returns:
637
+ bool: True if success, False if failed
638
+ """
639
+ try:
640
+ # Support single file and multiple files
641
+ if isinstance(file_path, str):
642
+ file_paths = [file_path]
643
+ elif isinstance(file_path, list):
644
+ file_paths = file_path
645
+ else:
646
+ logging.error(f'file_path must be str or list, got {type(file_path)}')
647
+ return False
648
+
649
+ valid_file_paths = []
650
+ for fp in file_paths:
651
+ if not fp or not isinstance(fp, str):
652
+ continue
653
+ if not os.path.exists(fp):
654
+ logging.error(f'File not found: {fp}')
655
+ continue
656
+ valid_file_paths.append(fp)
657
+
658
+ if not valid_file_paths:
659
+ logging.error('No valid files to upload.')
660
+ return False
661
+
662
+ # Get file extension for accept check
663
+ file_extension = os.path.splitext(valid_file_paths[0])[1].lower() if valid_file_paths else ''
664
+
665
+ # Find all file input elements and get more detailed selector
666
+ file_inputs = await self.page.evaluate(
667
+ """(fileExt) => {
668
+ return Array.from(document.querySelectorAll('input[type=\"file\"]'))
669
+ .map(input => {
670
+ const accept = input.getAttribute('accept') || '';
671
+ let selector = `input[type=\"file\"]`;
672
+
673
+ if (input.name) {
674
+ selector += `[name=\"${input.name}\"]`;
675
+ }
676
+
677
+ if (accept) {
678
+ selector += `[accept=\"${accept}\"]`;
679
+ }
680
+
681
+ return {
682
+ selector: selector,
683
+ accept: accept,
684
+ acceptsFile: accept ? accept.toLowerCase().includes(fileExt) : true
685
+ };
686
+ });
687
+ }""",
688
+ file_extension,
689
+ )
690
+
691
+ if not file_inputs:
692
+ logging.error('No file input elements found')
693
+ return False
694
+
695
+ # Find compatible input elements
696
+ logging.debug(f'file_inputs: {file_inputs}')
697
+ compatible_inputs = [input_elem for input_elem in file_inputs if input_elem.get('acceptsFile')]
698
+
699
+ # If compatible input elements are found, use the first one, otherwise fallback to the first available
700
+ logging.debug(f'compatible_inputs: {compatible_inputs}')
701
+ selected_input = compatible_inputs[0] if compatible_inputs else file_inputs[0]
702
+ logging.debug(f'selected_input: {selected_input}')
703
+
704
+ # Upload files (support batch)
705
+ selector = selected_input.get('selector')
706
+ logging.debug(f'Uploading files {valid_file_paths} to: {selector}')
707
+ await self.page.set_input_files(selector, valid_file_paths)
708
+
709
+ await asyncio.sleep(1)
710
+ return True
711
+
712
+ except Exception as e:
713
+ logging.error(f'Upload failed: {str(e)}')
714
+ return False
715
+
716
+ async def get_dropdown_options(self, id) -> Dict[str, Any]:
717
+ """Get all options of various type selectors.
718
+
719
+ supported selector types:
720
+ - native <select> element
721
+ - Ant Design Select (.ant-select)
722
+ - Ant Design Cascader (.ant-cascader)
723
+ - other custom dropdown components
724
+
725
+ Args:
726
+ id: element ID
727
+
728
+ Returns:
729
+ Dict: dictionary containing option information, format:
730
+ {
731
+ 'success': bool,
732
+ 'options': List[Dict] or None,
733
+ 'message': str,
734
+ 'selector_type': str # selector type
735
+ }
736
+ """
737
+ element = self.page_element_buffer.get(str(id))
738
+ if not element:
739
+ return {
740
+ 'success': False,
741
+ 'options': None,
742
+ 'message': f'Element with id {id} not found in buffer',
743
+ 'selector_type': 'unknown',
744
+ }
745
+
746
+ try:
747
+ # use JavaScript to detect selector type and get options
748
+ js_code = """
749
+ (elementData) => {
750
+ // find element by coordinates
751
+ const centerX = elementData.center_x;
752
+ const centerY = elementData.center_y;
753
+ const element = document.elementFromPoint(centerX, centerY);
754
+
755
+ if (!element) {
756
+ return { success: false, message: 'Element not found at coordinates', selector_type: 'unknown' };
757
+ }
758
+
759
+ let selectElement = element.closest('select');
760
+ if (selectElement) {
761
+ const options = Array.from(selectElement.options).map((opt, index) => ({
762
+ text: opt.text,
763
+ value: opt.value,
764
+ index: index,
765
+ selected: opt.selected
766
+ }));
767
+
768
+ return {
769
+ success: true,
770
+ options: options,
771
+ selector_type: 'native_select',
772
+ selectInfo: {
773
+ id: selectElement.id,
774
+ name: selectElement.name,
775
+ multiple: selectElement.multiple,
776
+ selectedIndex: selectElement.selectedIndex,
777
+ optionCount: selectElement.options.length
778
+ }
779
+ };
780
+ }
781
+
782
+ let antSelect = element.closest('.ant-select');
783
+ if (antSelect && !antSelect.classList.contains('ant-cascader')) {
784
+ // click to expand options
785
+ const selector = antSelect.querySelector('.ant-select-selector');
786
+ if (selector) {
787
+ selector.click();
788
+
789
+ // wait for options to appear
790
+ return new Promise((resolve) => {
791
+ setTimeout(() => {
792
+ const dropdown = document.querySelector('.ant-select-dropdown:not(.ant-select-dropdown-hidden)');
793
+ if (dropdown) {
794
+ const options = Array.from(dropdown.querySelectorAll('.ant-select-item-option')).map((opt, index) => {
795
+ const textEl = opt.querySelector('.ant-select-item-option-content');
796
+ return {
797
+ text: textEl ? textEl.textContent.trim() : opt.textContent.trim(),
798
+ value: opt.getAttribute('data-value') || opt.textContent.trim(),
799
+ index: index,
800
+ selected: opt.classList.contains('ant-select-item-option-selected'),
801
+ disabled: opt.classList.contains('ant-select-item-option-disabled')
802
+ };
803
+ });
804
+
805
+ resolve({
806
+ success: true,
807
+ options: options,
808
+ selector_type: 'ant_select',
809
+ selectInfo: {
810
+ multiple: antSelect.classList.contains('ant-select-multiple'),
811
+ allowClear: antSelect.classList.contains('ant-select-allow-clear'),
812
+ optionCount: options.length
813
+ }
814
+ });
815
+ } else {
816
+ resolve({
817
+ success: false,
818
+ message: 'Could not find dropdown options after clicking',
819
+ selector_type: 'ant_select'
820
+ });
821
+ }
822
+ }, 500);
823
+ });
824
+ }
825
+ }
826
+
827
+ // check if it is Ant Design Cascader
828
+ let antCascader = element.closest('.ant-cascader');
829
+ if (antCascader) {
830
+ // click to expand options
831
+ const selector = antCascader.querySelector('.ant-select-selector');
832
+ if (selector) {
833
+ selector.click();
834
+
835
+ // wait for cascader options to appear
836
+ return new Promise((resolve) => {
837
+ setTimeout(() => {
838
+ const dropdown = document.querySelector('.ant-cascader-dropdown:not(.ant-cascader-dropdown-hidden)');
839
+ if (dropdown) {
840
+ // get first level options
841
+ const firstLevelOptions = Array.from(dropdown.querySelectorAll('.ant-cascader-menu:first-child .ant-cascader-menu-item')).map((opt, index) => {
842
+ return {
843
+ text: opt.textContent.trim(),
844
+ value: opt.getAttribute('data-path-key') || opt.textContent.trim(),
845
+ index: index,
846
+ selected: opt.classList.contains('ant-cascader-menu-item-active'),
847
+ hasChildren: opt.classList.contains('ant-cascader-menu-item-expand'),
848
+ level: 0
849
+ };
850
+ });
851
+
852
+ resolve({
853
+ success: true,
854
+ options: firstLevelOptions,
855
+ selector_type: 'ant_cascader',
856
+ selectInfo: {
857
+ multiple: antCascader.classList.contains('ant-select-multiple'),
858
+ allowClear: antCascader.classList.contains('ant-select-allow-clear'),
859
+ optionCount: firstLevelOptions.length,
860
+ isExpanded: true
861
+ }
862
+ });
863
+ } else {
864
+ resolve({
865
+ success: false,
866
+ message: 'Could not find cascader dropdown after clicking',
867
+ selector_type: 'ant_cascader'
868
+ });
869
+ }
870
+ }, 500);
871
+ });
872
+ }
873
+ }
874
+
875
+ // check other possible dropdown components
876
+ let customDropdown = element.closest('[role="combobox"], [role="listbox"], .dropdown, .select');
877
+ if (customDropdown) {
878
+ // try generic method to get options
879
+ const options = Array.from(customDropdown.querySelectorAll('option, [role="option"], .option, .item')).map((opt, index) => ({
880
+ text: opt.textContent.trim(),
881
+ value: opt.getAttribute('value') || opt.getAttribute('data-value') || opt.textContent.trim(),
882
+ index: index,
883
+ selected: opt.hasAttribute('selected') || opt.classList.contains('selected') || opt.getAttribute('aria-selected') === 'true'
884
+ }));
885
+
886
+ if (options.length > 0) {
887
+ return {
888
+ success: true,
889
+ options: options,
890
+ selector_type: 'custom_dropdown',
891
+ selectInfo: {
892
+ optionCount: options.length
893
+ }
894
+ };
895
+ }
896
+ }
897
+
898
+ // if no match, return failure
899
+ return {
900
+ success: false,
901
+ message: 'No supported dropdown type found. Element classes: ' + element.className,
902
+ selector_type: 'unsupported'
903
+ };
904
+ }
905
+ """
906
+
907
+ result = await self.page.evaluate(js_code, element)
908
+
909
+ if result.get('success'):
910
+ logging.debug(f"Found {len(result['options'])} options in {result.get('selector_type')} dropdown")
911
+ return {
912
+ 'success': True,
913
+ 'options': result['options'],
914
+ 'selector_type': result.get('selector_type'),
915
+ 'selectInfo': result.get('selectInfo'),
916
+ 'message': f"Successfully retrieved {len(result['options'])} options from {result.get('selector_type')}",
917
+ }
918
+ else:
919
+ logging.error(f"Failed to get dropdown options: {result.get('message')}")
920
+ return {
921
+ 'success': False,
922
+ 'options': None,
923
+ 'selector_type': result.get('selector_type', 'unknown'),
924
+ 'message': result.get('message', 'Unknown error'),
925
+ }
926
+
927
+ except Exception as e:
928
+ logging.error(f'Error getting dropdown options: {str(e)}')
929
+ return {'success': False, 'options': None, 'selector_type': 'error', 'message': f'Error: {str(e)}'}
930
+
931
+ async def select_dropdown_option(self, dropdown_id, option_text, option_id=None):
932
+ """Priority option_id, otherwise use dropdown_id to expand and
933
+ select."""
934
+ # priority option_id
935
+ if option_id is not None:
936
+ element = self.page_element_buffer.get(str(option_id))
937
+ if element:
938
+ x = element.get('center_x')
939
+ y = element.get('center_y')
940
+ await self.page.mouse.click(x, y)
941
+ logging.debug(f'Clicked option_id {option_id} ({option_text}) directly.')
942
+ return {
943
+ 'success': True,
944
+ 'message': f"Clicked dropdown option '{option_text}' directly.",
945
+ 'selected_value': element.get('innerText'),
946
+ 'selector_type': 'ant_select_option',
947
+ }
948
+ else:
949
+ logging.warning(f'option_id {option_id} not found in buffer, fallback to dropdown_id.')
950
+
951
+ # fallback: use dropdown_id to expand and select
952
+ element = self.page_element_buffer.get(str(dropdown_id))
953
+ if not element:
954
+ return {
955
+ 'success': False,
956
+ 'message': f'dropdown_id {dropdown_id} not found in buffer',
957
+ 'selected_value': None,
958
+ 'selector_type': 'unknown',
959
+ }
960
+
961
+ try:
962
+ # use JavaScript to detect selector type and select option
963
+ js_code = """
964
+ (params) => {
965
+ const elementData = params.elementData;
966
+ const targetText = params.targetText;
967
+
968
+ // find element by coordinates
969
+ const centerX = elementData.center_x;
970
+ const centerY = elementData.center_y;
971
+ const element = document.elementFromPoint(centerX, centerY);
972
+
973
+ if (!element) {
974
+ return { success: false, message: 'Element not found at coordinates', selector_type: 'unknown' };
975
+ }
976
+
977
+ // 1. handle native select element
978
+ let selectElement = element.closest('select');
979
+ if (selectElement) {
980
+ // find matching options
981
+ let targetOption = null;
982
+ for (let i = 0; i < selectElement.options.length; i++) {
983
+ const option = selectElement.options[i];
984
+ if (option.text === targetText || option.text.includes(targetText) || targetText.includes(option.text)) {
985
+ targetOption = option;
986
+ break;
987
+ }
988
+ }
989
+
990
+ if (!targetOption) {
991
+ const availableOptions = Array.from(selectElement.options).map(opt => opt.text);
992
+ return {
993
+ success: false,
994
+ message: `Option "${targetText}" not found in native select. Available: ${availableOptions.join(', ')}`,
995
+ selector_type: 'native_select',
996
+ availableOptions: availableOptions
997
+ };
998
+ }
999
+
1000
+ // select option
1001
+ selectElement.selectedIndex = targetOption.index;
1002
+ targetOption.selected = true;
1003
+
1004
+ // trigger event
1005
+ selectElement.dispatchEvent(new Event('change', { bubbles: true }));
1006
+ selectElement.dispatchEvent(new Event('input', { bubbles: true }));
1007
+
1008
+ return {
1009
+ success: true,
1010
+ message: `Successfully selected option: "${targetOption.text}"`,
1011
+ selectedValue: targetOption.value,
1012
+ selectedText: targetOption.text,
1013
+ selector_type: 'native_select'
1014
+ };
1015
+ }
1016
+
1017
+ // 2. handle Ant Design Select
1018
+ let antSelect = element.closest('.ant-select');
1019
+ if (antSelect && !antSelect.classList.contains('ant-cascader')) {
1020
+ // ensure dropdown is expanded (idempotent)
1021
+ const selector = antSelect.querySelector('.ant-select-selector');
1022
+ if (selector) {
1023
+ const ensureExpanded = () => {
1024
+ const visible = document.querySelector('.ant-select-dropdown:not(.ant-select-dropdown-hidden)');
1025
+ if (visible) return Promise.resolve(visible);
1026
+ selector.click();
1027
+ return new Promise(res => setTimeout(() => {
1028
+ res(document.querySelector('.ant-select-dropdown:not(.ant-select-dropdown-hidden)'));
1029
+ }, 300));
1030
+ };
1031
+
1032
+ return new Promise((resolve) => {
1033
+ ensureExpanded().then((dropdown) => {
1034
+ if (dropdown) {
1035
+ // find matching options
1036
+ const options = Array.from(dropdown.querySelectorAll('.ant-select-item-option'));
1037
+ let targetOption = null;
1038
+
1039
+ for (let option of options) {
1040
+ const textEl = option.querySelector('.ant-select-item-option-content');
1041
+ const optionText = textEl ? textEl.textContent.trim() : option.textContent.trim();
1042
+
1043
+ if (optionText === targetText ||
1044
+ optionText.includes(targetText) ||
1045
+ targetText.includes(optionText)) {
1046
+ targetOption = option;
1047
+ break;
1048
+ }
1049
+ }
1050
+
1051
+ if (!targetOption) {
1052
+ const availableOptions = options.map(opt => {
1053
+ const textEl = opt.querySelector('.ant-select-item-option-content');
1054
+ return textEl ? textEl.textContent.trim() : opt.textContent.trim();
1055
+ });
1056
+ resolve({
1057
+ success: false,
1058
+ message: `Option "${targetText}" not found in ant-select. Available: ${availableOptions.join(', ')}`,
1059
+ selector_type: 'ant_select',
1060
+ availableOptions: availableOptions
1061
+ });
1062
+ return;
1063
+ }
1064
+
1065
+ // click option
1066
+ targetOption.click();
1067
+
1068
+ // trigger event
1069
+ antSelect.dispatchEvent(new Event('change', { bubbles: true }));
1070
+
1071
+ const selectedText = targetOption.querySelector('.ant-select-item-option-content')?.textContent.trim() || targetOption.textContent.trim();
1072
+ const selectedValue = targetOption.getAttribute('data-value') || selectedText;
1073
+
1074
+ resolve({
1075
+ success: true,
1076
+ message: `Successfully selected ant-select option: "${selectedText}"`,
1077
+ selectedValue: selectedValue,
1078
+ selectedText: selectedText,
1079
+ selector_type: 'ant_select'
1080
+ });
1081
+ } else {
1082
+ resolve({
1083
+ success: false,
1084
+ message: 'Could not find ant-select dropdown after clicking',
1085
+ selector_type: 'ant_select'
1086
+ });
1087
+ }
1088
+ });
1089
+ });
1090
+ }
1091
+ }
1092
+
1093
+ // 3. handle Ant Design Cascader
1094
+ let antCascader = element.closest('.ant-cascader');
1095
+ if (antCascader) {
1096
+ // ensure cascader is expanded (idempotent)
1097
+ const selector = antCascader.querySelector('.ant-select-selector');
1098
+ if (selector) {
1099
+ const ensureExpanded = () => {
1100
+ const visible = document.querySelector('.ant-cascader-dropdown:not(.ant-cascader-dropdown-hidden)');
1101
+ if (visible) return Promise.resolve(visible);
1102
+ selector.click();
1103
+ return new Promise(res => setTimeout(() => {
1104
+ res(document.querySelector('.ant-cascader-dropdown:not(.ant-cascader-dropdown-hidden)'));
1105
+ }, 300));
1106
+ };
1107
+
1108
+ return new Promise((resolve) => {
1109
+ ensureExpanded().then((dropdown) => {
1110
+ if (dropdown) {
1111
+ // find matching options in first level
1112
+ const firstLevelOptions = Array.from(dropdown.querySelectorAll('.ant-cascader-menu:first-child .ant-cascader-menu-item'));
1113
+ let targetOption = null;
1114
+
1115
+ for (let option of firstLevelOptions) {
1116
+ const optionText = option.textContent.trim();
1117
+ if (optionText === targetText ||
1118
+ optionText.includes(targetText) ||
1119
+ targetText.includes(optionText)) {
1120
+ targetOption = option;
1121
+ break;
1122
+ }
1123
+ }
1124
+
1125
+ if (!targetOption) {
1126
+ const availableOptions = firstLevelOptions.map(opt => opt.textContent.trim());
1127
+ resolve({
1128
+ success: false,
1129
+ message: `Option "${targetText}" not found in cascader first level. Available: ${availableOptions.join(', ')}`,
1130
+ selector_type: 'ant_cascader',
1131
+ availableOptions: availableOptions
1132
+ });
1133
+ return;
1134
+ }
1135
+
1136
+ // click option
1137
+ targetOption.click();
1138
+
1139
+ // if it is leaf node (no sub options), trigger select event and close dropdown
1140
+ if (!targetOption.classList.contains('ant-cascader-menu-item-expand')) {
1141
+ antCascader.dispatchEvent(new Event('change', { bubbles: true }));
1142
+
1143
+ // close dropdown
1144
+ setTimeout(() => {
1145
+ document.body.click();
1146
+ }, 100);
1147
+ }
1148
+
1149
+ const selectedText = targetOption.textContent.trim();
1150
+ const selectedValue = targetOption.getAttribute('data-path-key') || selectedText;
1151
+
1152
+ resolve({
1153
+ success: true,
1154
+ message: `Successfully selected cascader option: "${selectedText}"`,
1155
+ selectedValue: selectedValue,
1156
+ selectedText: selectedText,
1157
+ selector_type: 'ant_cascader'
1158
+ });
1159
+ } else {
1160
+ resolve({
1161
+ success: false,
1162
+ message: 'Could not find cascader dropdown after clicking',
1163
+ selector_type: 'ant_cascader'
1164
+ });
1165
+ }
1166
+ });
1167
+ });
1168
+ }
1169
+ }
1170
+
1171
+ // 4. handle other custom dropdown components
1172
+ let customDropdown = element.closest('[role="combobox"], [role="listbox"], .dropdown, .select');
1173
+ if (customDropdown) {
1174
+ // try to click to expand
1175
+ customDropdown.click();
1176
+
1177
+ setTimeout(() => {
1178
+ const options = Array.from(document.querySelectorAll('[role="option"], .option, .item'));
1179
+ let targetOption = null;
1180
+
1181
+ for (let option of options) {
1182
+ const optionText = option.textContent.trim();
1183
+ if (optionText === targetText ||
1184
+ optionText.includes(targetText) ||
1185
+ targetText.includes(optionText)) {
1186
+ targetOption = option;
1187
+ break;
1188
+ }
1189
+ }
1190
+
1191
+ if (targetOption) {
1192
+ targetOption.click();
1193
+ customDropdown.dispatchEvent(new Event('change', { bubbles: true }));
1194
+
1195
+ return {
1196
+ success: true,
1197
+ message: `Successfully selected custom dropdown option: "${targetOption.textContent.trim()}"`,
1198
+ selectedValue: targetOption.getAttribute('value') || targetOption.textContent.trim(),
1199
+ selectedText: targetOption.textContent.trim(),
1200
+ selector_type: 'custom_dropdown'
1201
+ };
1202
+ }
1203
+ }, 300);
1204
+ }
1205
+
1206
+ // if no match, return failure
1207
+ return {
1208
+ success: false,
1209
+ message: 'No supported dropdown type found for selection. Element classes: ' + element.className,
1210
+ selector_type: 'unsupported'
1211
+ };
1212
+ }
1213
+ """
1214
+
1215
+ result = await self.page.evaluate(js_code, {'elementData': element, 'targetText': option_text})
1216
+
1217
+ if result.get('success'):
1218
+ logging.debug(f"Successfully selected {result.get('selector_type')} option: {option_text}")
1219
+ return {
1220
+ 'success': True,
1221
+ 'message': result['message'],
1222
+ 'selected_value': result.get('selectedValue'),
1223
+ 'selected_text': result.get('selectedText'),
1224
+ 'selector_type': result.get('selector_type'),
1225
+ }
1226
+ else:
1227
+ logging.error(f"Failed to select dropdown option: {result.get('message')}")
1228
+ return {
1229
+ 'success': False,
1230
+ 'message': result.get('message', 'Unknown error'),
1231
+ 'selected_value': None,
1232
+ 'selector_type': result.get('selector_type', 'unknown'),
1233
+ 'available_options': result.get('availableOptions'),
1234
+ }
1235
+
1236
+ except Exception as e:
1237
+ logging.error(f'Error selecting dropdown option: {str(e)}')
1238
+ return {'success': False, 'message': f'Error: {str(e)}', 'selected_value': None, 'selector_type': 'error'}
1239
+
1240
+ async def select_cascade_level(self, id, option_text: str, level: int = 0) -> Dict[str, Any]:
1241
+ """Select cascade selector specific level option.
1242
+
1243
+ Args:
1244
+ id: element ID
1245
+ option_text: option text to select
1246
+ level: cascade level (0 for first level, 1 for second level, etc.)
1247
+
1248
+ Returns:
1249
+ Dict: operation result
1250
+ """
1251
+ element = self.page_element_buffer.get(str(id))
1252
+ if not element:
1253
+ return {
1254
+ 'success': False,
1255
+ 'message': f'Element with id {id} not found in buffer',
1256
+ 'selector_type': 'unknown',
1257
+ }
1258
+
1259
+ try:
1260
+ # use JavaScript to perform cascade selection
1261
+ js_code = """
1262
+ (params) => {
1263
+ const elementData = params.elementData;
1264
+ const targetText = params.targetText;
1265
+ const level = params.level;
1266
+
1267
+ // find element by coordinates
1268
+ const centerX = elementData.center_x;
1269
+ const centerY = elementData.center_y;
1270
+ const element = document.elementFromPoint(centerX, centerY);
1271
+
1272
+ if (!element) {
1273
+ return { success: false, message: 'Element not found at coordinates', selector_type: 'unknown' };
1274
+ }
1275
+
1276
+ // check if it is Ant Design Cascader
1277
+ let antCascader = element.closest('.ant-cascader');
1278
+ if (antCascader) {
1279
+ return new Promise((resolve) => {
1280
+ // if it is first level, need to click to open dropdown
1281
+ if (level === 0) {
1282
+ const selector = antCascader.querySelector('.ant-select-selector');
1283
+ if (selector) {
1284
+ selector.click();
1285
+ }
1286
+ }
1287
+
1288
+ setTimeout(() => {
1289
+ const dropdown = document.querySelector('.ant-cascader-dropdown:not(.ant-cascader-dropdown-hidden)');
1290
+ if (!dropdown) {
1291
+ resolve({
1292
+ success: false,
1293
+ message: `Could not find cascader dropdown for level ${level}`,
1294
+ selector_type: 'ant_cascader'
1295
+ });
1296
+ return;
1297
+ }
1298
+
1299
+ // select corresponding menu by level
1300
+ const menus = dropdown.querySelectorAll('.ant-cascader-menu');
1301
+ if (level >= menus.length) {
1302
+ resolve({
1303
+ success: false,
1304
+ message: `Level ${level} not available, only ${menus.length} levels found`,
1305
+ selector_type: 'ant_cascader'
1306
+ });
1307
+ return;
1308
+ }
1309
+
1310
+ const targetMenu = menus[level];
1311
+ const options = Array.from(targetMenu.querySelectorAll('.ant-cascader-menu-item'));
1312
+ let targetOption = null;
1313
+
1314
+ // find matching options
1315
+ for (let option of options) {
1316
+ const optionText = option.textContent.trim();
1317
+ if (optionText === targetText ||
1318
+ optionText.includes(targetText) ||
1319
+ targetText.includes(optionText)) {
1320
+ targetOption = option;
1321
+ break;
1322
+ }
1323
+ }
1324
+
1325
+ if (!targetOption) {
1326
+ const availableOptions = options.map(opt => opt.textContent.trim());
1327
+ resolve({
1328
+ success: false,
1329
+ message: `Option "${targetText}" not found in level ${level}. Available: ${availableOptions.join(', ')}`,
1330
+ selector_type: 'ant_cascader',
1331
+ availableOptions: availableOptions
1332
+ });
1333
+ return;
1334
+ }
1335
+
1336
+ // click option
1337
+ targetOption.click();
1338
+
1339
+ const selectedText = targetOption.textContent.trim();
1340
+ const selectedValue = targetOption.getAttribute('data-path-key') || selectedText;
1341
+
1342
+ // if it is last level or no sub options, trigger select event and close dropdown
1343
+ if (!targetOption.classList.contains('ant-cascader-menu-item-expand')) {
1344
+ setTimeout(() => {
1345
+ antCascader.dispatchEvent(new Event('change', { bubbles: true }));
1346
+ // close dropdown
1347
+ document.body.click();
1348
+ }, 100);
1349
+ }
1350
+
1351
+ resolve({
1352
+ success: true,
1353
+ message: `Successfully selected level ${level} option: "${selectedText}"`,
1354
+ selectedValue: selectedValue,
1355
+ selectedText: selectedText,
1356
+ selector_type: 'ant_cascader',
1357
+ level: level
1358
+ });
1359
+ }, level === 0 ? 500 : 300); // first level needs more time to wait for dropdown to open
1360
+ });
1361
+ }
1362
+
1363
+ // handle other types of cascade selectors
1364
+ return {
1365
+ success: false,
1366
+ message: 'Only Ant Design Cascader is supported for cascade selection',
1367
+ selector_type: 'unsupported'
1368
+ };
1369
+ }
1370
+ """
1371
+
1372
+ result = await self.page.evaluate(
1373
+ js_code, {'elementData': element, 'targetText': option_text, 'level': level}
1374
+ )
1375
+
1376
+ if result.get('success'):
1377
+ logging.debug(f'Successfully selected level {level} option: {option_text}')
1378
+ return {
1379
+ 'success': True,
1380
+ 'message': result['message'],
1381
+ 'selected_value': result.get('selectedValue'),
1382
+ 'selected_text': result.get('selectedText'),
1383
+ 'selector_type': result.get('selector_type'),
1384
+ 'level': level,
1385
+ }
1386
+ else:
1387
+ logging.error(f"Failed to select level {level} option: {result.get('message')}")
1388
+ return {
1389
+ 'success': False,
1390
+ 'message': result.get('message', 'Unknown error'),
1391
+ 'selector_type': result.get('selector_type', 'unknown'),
1392
+ 'available_options': result.get('availableOptions'),
1393
+ 'level': level,
1394
+ }
1395
+
1396
+ except Exception as e:
1397
+ logging.error(f'Error selecting cascade level {level} option: {str(e)}')
1398
+ return {'success': False, 'message': f'Error: {str(e)}', 'selector_type': 'error', 'level': level}
1399
+
1400
+ async def drag(self, source_coords, target_coords):
1401
+ """Execute drag action."""
1402
+
1403
+ source_x = source_coords.get('x')
1404
+ source_y = source_coords.get('y')
1405
+ target_x = target_coords.get('x')
1406
+ target_y = target_coords.get('y')
1407
+
1408
+ try:
1409
+
1410
+ # move to start position
1411
+ await self.page.mouse.move(source_x, source_y)
1412
+ await asyncio.sleep(0.1)
1413
+
1414
+ # press mouse
1415
+ await self.page.mouse.down()
1416
+ await asyncio.sleep(0.1)
1417
+
1418
+ # drag to target position
1419
+ await self.page.mouse.move(target_x, target_y)
1420
+ await asyncio.sleep(0.1)
1421
+
1422
+ # release mouse
1423
+ await self.page.mouse.up()
1424
+ await asyncio.sleep(0.2)
1425
+
1426
+ logging.debug(f'Drag completed from ({source_x}, {source_y}) to ({target_x}, {target_y})')
1427
+ return True
1428
+
1429
+ except Exception as e:
1430
+ logging.error(f'Drag action failed: {str(e)}')
1431
+ return False
webqa_agent/actions/click_handler.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ from typing import Any, Dict, Optional
4
+
5
+ from playwright.async_api import Page
6
+
7
+ from webqa_agent.actions.action_handler import ActionHandler
8
+
9
+
10
+ class ClickHandler:
11
+ """Enhanced click handler with multiple locating strategies."""
12
+
13
+ def __init__(self):
14
+ self.console_errors = []
15
+ self.network_errors = []
16
+ self.response_errors = []
17
+
18
+ async def setup_listeners(self, page: Page):
19
+ """Setup console and network error listeners."""
20
+
21
+ # Console error listener
22
+ async def on_console(msg):
23
+ if msg.type in ["error", "warning"]:
24
+ error_info = {
25
+ "type": msg.type,
26
+ "text": msg.text,
27
+ "location": msg.location,
28
+ "timestamp": asyncio.get_event_loop().time(),
29
+ }
30
+ self.console_errors.append(error_info)
31
+ logging.debug(f"Console {msg.type}: {msg.text}")
32
+
33
+ # Network error listener
34
+ async def on_request_failed(request):
35
+ IGNORE_ERRORS = [
36
+ "net::ERR_ABORTED",
37
+ "net::ERR_CACHE_MISS",
38
+ ]
39
+ if request.failure not in IGNORE_ERRORS:
40
+ error_info = {
41
+ "url": request.url,
42
+ "method": request.method,
43
+ "failure": request.failure,
44
+ }
45
+ self.network_errors.append(error_info)
46
+ logging.debug(f"Network error: {request.url} - {request.failure}")
47
+
48
+ # Response error listener (4xx, 5xx)
49
+ async def on_response(response):
50
+ if response.status >= 400:
51
+ error_info = {
52
+ "url": response.url,
53
+ "status": response.status,
54
+ "status_text": response.status_text,
55
+ }
56
+ self.response_errors.append(error_info)
57
+ logging.debug(f"Response error: {response.url} - {response.status}")
58
+
59
+ # Attach listeners
60
+ page.on("console", on_console)
61
+ page.on("requestfailed", on_request_failed)
62
+ page.on("response", on_response)
63
+
64
+ async def click_and_screenshot(
65
+ self, page: Page, element_info: Dict[str, Any], element_index: int = 0
66
+ ) -> Dict[str, Any]:
67
+ """Click an element and monitor for errors.
68
+
69
+ Args:
70
+ page: Playwright page object
71
+ element_info: Element information from clickable_elements_detection
72
+ element_index: Index of the element being tested
73
+
74
+ Returns:
75
+ Dictionary containing click result and any errors
76
+ """
77
+
78
+ # Clear previous errors
79
+ action_handler = ActionHandler()
80
+ action_handler.page = page
81
+
82
+ click_result = {
83
+ "element": element_info,
84
+ "success": False,
85
+ "error": None,
86
+ "console_errors": [],
87
+ "network_errors": [],
88
+ "response_errors": [],
89
+ "screenshot_before": None,
90
+ "screenshot_after": None,
91
+ "new_page_screenshot": None,
92
+ "click_method": None,
93
+ "click_coordinates": None,
94
+ "has_new_page": False,
95
+ }
96
+
97
+ selector = element_info.get("selector")
98
+ xpath = element_info.get("xpath")
99
+ click_success = False
100
+
101
+ logging.debug(f"Clicking element: {element_info}")
102
+
103
+ context = page.context
104
+ new_page = None
105
+
106
+ def handle_new_page(page_obj):
107
+ nonlocal new_page
108
+ new_page = page_obj
109
+ logging.debug(f"New page detected: {page_obj.url}")
110
+
111
+ context.on("page", handle_new_page)
112
+
113
+ click_success = await self._perform_click(page, selector, xpath, click_result)
114
+
115
+ if click_success:
116
+ click_result["success"] = True
117
+ await asyncio.sleep(2)
118
+ if new_page:
119
+ click_result["has_new_page"] = True
120
+ try:
121
+ await new_page.wait_for_load_state("networkidle", timeout=30000)
122
+
123
+ new_page_action_handler = ActionHandler()
124
+ new_page_action_handler.page = new_page
125
+ screenshot_b64 = await new_page_action_handler.b64_page_screenshot(
126
+ file_name=f"element_{element_index}_new_page"
127
+ )
128
+ click_result["new_page_screenshot"] = screenshot_b64
129
+ logging.debug("New page screenshot saved")
130
+
131
+ except Exception as e:
132
+ click_result["error"] = f"Failed to handle new page: {e}"
133
+ logging.warning(f"Failed to handle new page: {e}")
134
+
135
+ await page.wait_for_load_state("networkidle", timeout=30000)
136
+ else:
137
+ screenshot_b64 = await action_handler.b64_page_screenshot(
138
+ file_name=f"element_{element_index}_after_click"
139
+ )
140
+ click_result["screenshot_after"] = screenshot_b64
141
+ logging.debug("After click screenshot saved")
142
+
143
+ else:
144
+ click_result["error"] = f"Failed to click element with all strategies. Element: '{element_info}'"
145
+ logging.warning(f"Failed to click element: '{element_info}'")
146
+
147
+ context.remove_listener("page", handle_new_page)
148
+ await self._close_popups(page)
149
+
150
+ return click_result
151
+
152
+ async def _perform_click(
153
+ self, page: Page, selector: Optional[str], xpath: Optional[str], click_result: Dict
154
+ ) -> bool:
155
+ click_timeout = 10000
156
+
157
+ if xpath:
158
+ locator_str = f"xpath={xpath}"
159
+ try:
160
+ await self._scroll_into_view_safely(page, locator_str)
161
+ await page.click(locator_str, timeout=click_timeout)
162
+ click_result["click_method"] = locator_str
163
+ logging.debug(f"Successfully clicked using xpath: {xpath}")
164
+ return True
165
+ except Exception as e:
166
+ logging.debug(f"XPath click failed: {e}")
167
+ click_result["error"] = str(e)
168
+
169
+ if selector:
170
+ try:
171
+ await self._scroll_into_view_safely(page, selector)
172
+ await page.click(selector, timeout=click_timeout)
173
+ click_result["click_method"] = selector
174
+ logging.debug(f"Successfully clicked using selector: {selector}")
175
+ return True
176
+ except Exception as e:
177
+ logging.debug(f"Selector click failed: {e}")
178
+ click_result["error"] = str(e)
179
+
180
+ try:
181
+ element_handle = None
182
+ if selector:
183
+ try:
184
+ element_handle = await page.query_selector(selector)
185
+ except Exception as e:
186
+ logging.debug(f"query_selector failed for selector: {e}")
187
+
188
+ if not element_handle and xpath:
189
+ try:
190
+ element_handle = await page.query_selector(f"xpath={xpath}")
191
+ except Exception as e:
192
+ logging.debug(f"query_selector failed for xpath: {e}")
193
+
194
+ if element_handle:
195
+ await page.evaluate("el => el.click()", element_handle)
196
+ click_result["click_method"] = f"js_evaluate_click:{selector or xpath}"
197
+ logging.debug("Successfully clicked using JS evaluate")
198
+ return True
199
+ else:
200
+ click_result["error"] = "No element handle found for JS click"
201
+
202
+ except Exception as e:
203
+ logging.debug(f"JS click failed: {e}")
204
+ click_result["error"] = f"All click strategies failed. Last error: {e}"
205
+
206
+ return False
207
+
208
+ @staticmethod
209
+ async def _scroll_into_view_safely(page: Page, locator: str):
210
+ try:
211
+ await page.locator(locator).scroll_into_view_if_needed(timeout=3000)
212
+ except Exception as e:
213
+ logging.debug(f"scroll_into_view_if_needed failed for {locator}: {e}")
214
+
215
+ async def _close_popups(self, page: Page):
216
+ try:
217
+ popup_detected = await self._detect_popup(page)
218
+
219
+ if not popup_detected:
220
+ logging.debug("No popup detected, skipping close operation")
221
+ return
222
+
223
+ logging.debug("Popup detected, attempting to close...")
224
+
225
+ close_selectors = [
226
+ '[data-dismiss="modal"]',
227
+ '[data-bs-dismiss="modal"]',
228
+ ".modal-close",
229
+ ".close",
230
+ ".btn-close",
231
+ ".fa-times",
232
+ ".fa-close",
233
+ ".icon-close",
234
+ ".icon-x",
235
+ '[aria-label*="close"]',
236
+ '[aria-label*="Close"]',
237
+ '[title*="close"]',
238
+ '[title*="Close"]',
239
+ 'button:has-text("×")',
240
+ 'button:has-text("✕")',
241
+ 'button:has-text("Close")',
242
+ 'button:has-text("关闭")',
243
+ ".modal-backdrop",
244
+ ".overlay",
245
+ ]
246
+
247
+ popup_closed = False
248
+ for selector in close_selectors:
249
+ try:
250
+ element = await page.query_selector(selector)
251
+ if element:
252
+ is_visible = await element.is_visible()
253
+ if is_visible:
254
+ await element.click(timeout=2000)
255
+ logging.debug(f"Closed popup using selector: {selector}")
256
+ popup_closed = True
257
+ await asyncio.sleep(0.3) # Wait for close animation
258
+ break
259
+ except Exception:
260
+ continue
261
+
262
+ if not popup_closed:
263
+ try:
264
+ await page.keyboard.press("Escape")
265
+ logging.debug("Attempted to close popup with ESC key")
266
+ await asyncio.sleep(0.3)
267
+ except Exception:
268
+ pass
269
+
270
+ except Exception as e:
271
+ logging.debug(f"Popup close attempt failed: {e}")
272
+
273
+ async def _detect_popup(self, page: Page):
274
+ try:
275
+ popup_selectors = [
276
+ ".modal.show",
277
+ ".modal.in",
278
+ '.modal[style*="display: block"]',
279
+ ".dialog",
280
+ ".popup",
281
+ ".overlay.show",
282
+ '.overlay[style*="display: block"]',
283
+ '[role="dialog"]',
284
+ '[role="alertdialog"]',
285
+ ".fancybox-overlay",
286
+ ".ui-dialog",
287
+ ".sweet-alert",
288
+ ".swal-overlay",
289
+ '[style*="z-index"]',
290
+ ]
291
+
292
+ for selector in popup_selectors:
293
+ try:
294
+ element = await page.query_selector(selector)
295
+ if element:
296
+ is_visible = await element.is_visible()
297
+ if is_visible:
298
+ bbox = await element.bounding_box()
299
+ if bbox and bbox["width"] > 100 and bbox["height"] > 100:
300
+ logging.debug(f"Popup detected with selector: {selector}")
301
+ return True
302
+ except Exception:
303
+ continue
304
+
305
+ backdrop_selectors = [".modal-backdrop", ".overlay", '[class*="backdrop"]']
306
+
307
+ for selector in backdrop_selectors:
308
+ try:
309
+ element = await page.query_selector(selector)
310
+ if element:
311
+ is_visible = await element.is_visible()
312
+ if is_visible:
313
+ logging.debug(f"Backdrop detected with selector: {selector}")
314
+ return True
315
+ except Exception:
316
+ continue
317
+
318
+ return False
319
+
320
+ except Exception as e:
321
+ logging.debug(f"Popup detection failed: {e}")
322
+ return False
323
+
324
+ def get_error_summary(self) -> Dict[str, Any]:
325
+ """Get a summary of all errors collected."""
326
+ return {
327
+ "total_console_errors": len(self.console_errors),
328
+ "total_network_errors": len(self.network_errors),
329
+ "total_response_errors": len(self.response_errors),
330
+ "console_errors": self.console_errors,
331
+ "network_errors": self.network_errors,
332
+ "response_errors": self.response_errors,
333
+ }
334
+
335
+ def reset_errors(self):
336
+ """Reset all error collections."""
337
+ self.console_errors.clear()
338
+ self.network_errors.clear()
339
+ self.response_errors.clear()
webqa_agent/actions/scroll_handler.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ import time
4
+
5
+ from playwright.async_api import Page
6
+
7
+ from webqa_agent.actions.action_handler import ActionHandler
8
+
9
+
10
+ class ScrollHandler:
11
+ def __init__(self, page: Page):
12
+ self.page = page
13
+ self.id_counter = 1
14
+
15
+ self._action_handler = ActionHandler()
16
+ self._action_handler.page = page
17
+
18
+ async def detect_scrollable_containers(self):
19
+ scrollable_containers_script = """
20
+ (function() {
21
+ function findScrollableContainers() {
22
+ const elements = document.querySelectorAll('*');
23
+ const scrollableContainers = [];
24
+
25
+ for (let element of elements) {
26
+ if (element === document.body || element === document.documentElement) {
27
+ continue;
28
+ }
29
+
30
+ const style = window.getComputedStyle(element);
31
+ const hasScrollableContent = element.scrollHeight > element.clientHeight ||
32
+ element.scrollWidth > element.clientWidth;
33
+ const hasScrollableStyle = style.overflow === 'auto' ||
34
+ style.overflow === 'scroll' ||
35
+ style.overflowY === 'auto' ||
36
+ style.overflowY === 'scroll' ||
37
+ style.overflowX === 'auto' ||
38
+ style.overflowX === 'scroll';
39
+
40
+ if (hasScrollableContent && hasScrollableStyle) {
41
+ const rect = element.getBoundingClientRect();
42
+ if (rect.width > 0 && rect.height > 0) {
43
+ scrollableContainers.push({
44
+ tagName: element.tagName,
45
+ className: element.className,
46
+ id: element.id,
47
+ scrollHeight: element.scrollHeight,
48
+ clientHeight: element.clientHeight,
49
+ scrollWidth: element.scrollWidth,
50
+ clientWidth: element.clientWidth,
51
+ rect: {
52
+ top: rect.top,
53
+ left: rect.left,
54
+ width: rect.width,
55
+ height: rect.height
56
+ }
57
+ });
58
+ }
59
+ }
60
+ }
61
+
62
+ return scrollableContainers.sort((a, b) =>
63
+ (b.rect.width * b.rect.height) - (a.rect.width * a.rect.height)
64
+ );
65
+ }
66
+
67
+ return findScrollableContainers();
68
+ })()
69
+ """
70
+
71
+ try:
72
+ containers = await self.page.evaluate(scrollable_containers_script)
73
+ logging.debug(f"Found {len(containers)} scrollable containers")
74
+ return containers
75
+ except Exception as e:
76
+ logging.error(f"Error detecting scrollable containers: {e}")
77
+ return []
78
+
79
+ async def can_global_scroll(self):
80
+ can_scroll_script = """
81
+ (function() {
82
+ function canGlobalScroll() {
83
+ const documentHeight = Math.max(
84
+ document.body.scrollHeight,
85
+ document.body.offsetHeight,
86
+ document.documentElement.clientHeight,
87
+ document.documentElement.scrollHeight,
88
+ document.documentElement.offsetHeight
89
+ );
90
+ const windowHeight = window.innerHeight;
91
+ const currentScrollY = window.scrollY;
92
+
93
+ return {
94
+ canScroll: documentHeight > windowHeight,
95
+ documentHeight: documentHeight,
96
+ windowHeight: windowHeight,
97
+ currentScrollY: currentScrollY,
98
+ maxScrollY: documentHeight - windowHeight
99
+ };
100
+ }
101
+
102
+ return canGlobalScroll();
103
+ })()
104
+ """
105
+
106
+ try:
107
+ scroll_info = await self.page.evaluate(can_scroll_script)
108
+ logging.debug(f"Global scroll info: {scroll_info}")
109
+ return scroll_info
110
+ except Exception as e:
111
+ logging.error(f"Error checking global scroll capability: {e}")
112
+ return {"canScroll": False, "documentHeight": 0, "windowHeight": 0, "currentScrollY": 0, "maxScrollY": 0}
113
+
114
+ async def scroll_global(self, max_scrolls: int = 10, capture_screenshots: bool = True, page_identifier: str = ""):
115
+ logging.debug("Executing global page scrolling")
116
+
117
+ viewport_height = await self.page.evaluate("window.innerHeight")
118
+ screenshot_image_list = []
119
+
120
+ async def capture_viewport(screenshot_counter=0):
121
+ if capture_screenshots:
122
+ timestamp = time.strftime("%Y%m%d%H%M%S")
123
+ processed_filename = f"{timestamp}_{page_identifier}_global_viewport_{screenshot_counter}"
124
+
125
+ screenshot_base64 = await self._action_handler.b64_page_screenshot(file_name=processed_filename)
126
+
127
+ if screenshot_base64:
128
+ screenshot_image_list.append(screenshot_base64)
129
+
130
+ scroll_count = 0
131
+ await capture_viewport(scroll_count)
132
+
133
+ while scroll_count < max_scrolls:
134
+ current_scroll_y = await self.page.evaluate("window.scrollY")
135
+ document_height = await self.page.evaluate("document.documentElement.scrollHeight")
136
+
137
+ if current_scroll_y + viewport_height >= document_height:
138
+ logging.debug("Reached bottom of the page.")
139
+ break
140
+
141
+ await self.page.evaluate(f"window.scrollBy(0, {viewport_height})")
142
+ await asyncio.sleep(2)
143
+ scroll_count += 1
144
+ logging.info(f"Global scrolling down... count: {scroll_count}")
145
+ await capture_viewport(scroll_count)
146
+
147
+ return screenshot_image_list
148
+
149
+ async def scroll_container(
150
+ self,
151
+ container_selector: str,
152
+ max_scrolls: int = 10,
153
+ capture_screenshots: bool = True,
154
+ page_identifier: str = "",
155
+ ):
156
+ logging.debug(f"Executing container scrolling for: {container_selector}")
157
+
158
+ safe_selector = self._escape_selector(container_selector)
159
+ if safe_selector != container_selector:
160
+ logging.warning(f"Selector escaped from '{container_selector}' to '{safe_selector}'")
161
+
162
+ screenshot_image_list = []
163
+
164
+ async def capture_viewport(screenshot_counter=0):
165
+ if capture_screenshots:
166
+ timestamp = time.strftime("%Y%m%d%H%M%S")
167
+ processed_filename = f"{timestamp}_{page_identifier}_container_viewport_{screenshot_counter}"
168
+
169
+ screenshot_base64 = await self._action_handler.b64_page_screenshot(file_name=processed_filename)
170
+
171
+ if screenshot_base64:
172
+ screenshot_image_list.append(screenshot_base64)
173
+
174
+ try:
175
+ container_exists = await self.page.evaluate(
176
+ f"""
177
+ (function() {{
178
+ try {{
179
+ return !!document.querySelector('{safe_selector}');
180
+ }} catch(e) {{
181
+ console.error('Selector error:', e);
182
+ return false;
183
+ }}
184
+ }})()
185
+ """
186
+ )
187
+ except Exception as e:
188
+ logging.error(f"Error checking container existence: {e}")
189
+ return screenshot_image_list
190
+
191
+ if not container_exists:
192
+ logging.error(f"Container with selector '{safe_selector}' not found")
193
+ return screenshot_image_list
194
+
195
+ scroll_count = 0
196
+ await capture_viewport(scroll_count)
197
+
198
+ while scroll_count < max_scrolls:
199
+
200
+ try:
201
+ scroll_info = await self.page.evaluate(
202
+ f"""
203
+ (function() {{
204
+ try {{
205
+ const container = document.querySelector('{safe_selector}');
206
+ if (!container) return null;
207
+
208
+ return {{
209
+ scrollTop: container.scrollTop,
210
+ scrollHeight: container.scrollHeight,
211
+ clientHeight: container.clientHeight,
212
+ canScroll: container.scrollHeight > container.clientHeight
213
+ }};
214
+ }} catch(e) {{
215
+ console.error('Scroll info error:', e);
216
+ return null;
217
+ }}
218
+ }})()
219
+ """
220
+ )
221
+ except Exception as e:
222
+ logging.error(f"Error getting scroll info: {e}")
223
+ break
224
+
225
+ if not scroll_info or not scroll_info["canScroll"]:
226
+ logging.debug("Container cannot scroll or reached bottom")
227
+ break
228
+
229
+ if scroll_info["scrollTop"] + scroll_info["clientHeight"] >= scroll_info["scrollHeight"]:
230
+ logging.debug("Reached bottom of the container")
231
+ break
232
+
233
+ # scroll container
234
+ scroll_amount = scroll_info["clientHeight"]
235
+ try:
236
+ await self.page.evaluate(
237
+ f"""
238
+ (function() {{
239
+ try {{
240
+ const container = document.querySelector('{safe_selector}');
241
+ if (container) {{
242
+ container.scrollBy(0, {scroll_amount});
243
+ }}
244
+ }} catch(e) {{
245
+ console.error('Scroll error:', e);
246
+ }}
247
+ }})()
248
+ """
249
+ )
250
+ except Exception as e:
251
+ logging.error(f"Error scrolling container: {e}")
252
+ break
253
+
254
+ await asyncio.sleep(2)
255
+ scroll_count += 1
256
+ logging.info(f"Container scrolling down... count: {scroll_count}")
257
+ await capture_viewport(scroll_count)
258
+
259
+ return screenshot_image_list
260
+
261
+ def _safe_selector(self, element_info):
262
+ if element_info.get("id") and element_info["id"].strip():
263
+ element_id = element_info["id"].strip()
264
+ if element_id and not any(c in element_id for c in [" ", '"', "'", "\\", "/"]):
265
+ return f"#{element_id}"
266
+
267
+ if element_info.get("className") and element_info["className"].strip():
268
+ class_names = element_info["className"].strip().split()
269
+ for class_name in class_names:
270
+ if class_name and all(c.isalnum() or c in ["-", "_"] for c in class_name):
271
+ return f".{class_name}"
272
+
273
+ tag_name = element_info.get("tagName", "div").lower()
274
+ return tag_name
275
+
276
+ def _escape_selector(self, selector):
277
+
278
+ if any(c in selector for c in ['"', "'", "\\", "/"]):
279
+ return "div"
280
+ return selector
281
+
282
+ async def scroll_and_crawl(
283
+ self,
284
+ scroll: bool = True,
285
+ max_scrolls: int = 10,
286
+ capture_screenshots: bool = True,
287
+ page_identifier: str = "",
288
+ prefer_container: bool = True,
289
+ ):
290
+
291
+ screenshot_image_list = []
292
+
293
+ # if not scroll, exit after initial capture
294
+ if not scroll:
295
+ logging.debug("Scrolling disabled, exiting after initial capture.")
296
+ timestamp = time.strftime("%Y%m%d%H%M%S")
297
+ processed_filename = f"{timestamp}_{page_identifier}_initial"
298
+ screenshot_base64 = await self._action_handler.b64_page_screenshot(file_name=processed_filename)
299
+ if screenshot_base64:
300
+ screenshot_image_list.append(screenshot_base64)
301
+ return screenshot_image_list
302
+
303
+ try:
304
+ # check global scroll ability
305
+ global_scroll_info = await self.can_global_scroll()
306
+
307
+ if global_scroll_info["canScroll"]:
308
+ logging.debug("Global scrolling is possible, using global scroll")
309
+ screenshot_image_list = await self.scroll_global(max_scrolls, capture_screenshots, page_identifier)
310
+ else:
311
+ logging.debug("Global scrolling not possible, checking for scrollable containers")
312
+
313
+ # detect scrollable containers
314
+ containers = await self.detect_scrollable_containers()
315
+
316
+ if containers:
317
+ # select the largest container for scrolling
318
+ main_container = containers[0]
319
+ logging.debug(
320
+ f"Using main container: {main_container['tagName']} (class: {main_container.get('className', 'N/A')})"
321
+ )
322
+
323
+ # build safe selector
324
+ selector = self._safe_selector(main_container)
325
+ logging.debug(f"Using selector: {selector}")
326
+
327
+ screenshot_image_list = await self.scroll_container(
328
+ selector, max_scrolls, capture_screenshots, page_identifier
329
+ )
330
+
331
+ # if first container scrolling failed, try other containers
332
+ if len(screenshot_image_list) <= 1 and len(containers) > 1:
333
+ logging.debug("Main container scrolling failed, trying other containers")
334
+ for i, container in enumerate(containers[1:], 1):
335
+ logging.debug(
336
+ f"Trying container {i+1}: {container['tagName']} (class: {container.get('className', 'N/A')})"
337
+ )
338
+
339
+ selector = self._safe_selector(container)
340
+ logging.debug(f"Using selector: {selector}")
341
+
342
+ container_screenshots = await self.scroll_container(
343
+ selector, max_scrolls, capture_screenshots, page_identifier
344
+ )
345
+ if len(container_screenshots) > 1:
346
+ screenshot_image_list = container_screenshots
347
+ break
348
+ else:
349
+ logging.debug("No scrollable containers found, taking single screenshot")
350
+ timestamp = time.strftime("%Y%m%d%H%M%S")
351
+ processed_filename = f"{timestamp}_{page_identifier}_no_scroll"
352
+ screenshot_base64 = await self._action_handler.b64_page_screenshot(file_name=processed_filename)
353
+ if screenshot_base64:
354
+ screenshot_image_list.append(screenshot_base64)
355
+
356
+ except Exception as e:
357
+ logging.error(f"Error in smart scroll: {e}")
358
+ # if error, at least take one screenshot
359
+ timestamp = time.strftime("%Y%m%d%H%M%S")
360
+ processed_filename = f"{timestamp}_{page_identifier}_error_fallback"
361
+ screenshot_base64 = await self._action_handler.b64_page_screenshot(file_name=processed_filename)
362
+ if screenshot_base64:
363
+ screenshot_image_list.append(screenshot_base64)
364
+
365
+ return screenshot_image_list
webqa_agent/browser/check.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from datetime import datetime
3
+
4
+ from playwright.async_api import Page
5
+
6
+
7
+ class NetworkCheck:
8
+ def __init__(self, page: Page):
9
+ self.page = page
10
+ self.network_messages = {"failed_requests": [], "responses": [], "requests": []}
11
+ self._response_callback = self._handle_response()
12
+ self._request_callback = self._handle_request()
13
+ self._requestfinished_callback = self._handle_request_finished()
14
+ self._setup_listeners()
15
+
16
+ def _setup_listeners(self):
17
+ # 1. listen to request
18
+ self.page.on("request", self._request_callback)
19
+ # 2. listen to response
20
+ self.page.on("response", self._response_callback)
21
+ # 3. listen to request finished
22
+ self.page.on("requestfinished", self._requestfinished_callback)
23
+
24
+ def _handle_request(self):
25
+ async def request_callback(request):
26
+ request_data = {
27
+ "url": request.url,
28
+ "method": request.method,
29
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
30
+ "has_response": False,
31
+ "completed": False,
32
+ "failed": False,
33
+ "is_sse": False,
34
+ "sse_messages": [], # list for storing SSE messages
35
+ }
36
+ self.network_messages["requests"].append(request_data)
37
+
38
+ return request_callback
39
+
40
+ def _handle_response(self):
41
+ async def response_callback(response):
42
+ response_url = response.url
43
+ try:
44
+ current_request = None
45
+ for request in self.network_messages["requests"]:
46
+ if request["url"] == response_url:
47
+ request["has_response"] = True
48
+ current_request = request
49
+ break
50
+
51
+ if not current_request:
52
+ return
53
+
54
+ # Get response headers
55
+ try:
56
+ headers = await response.all_headers()
57
+ content_type = headers.get("content-type", "")
58
+ except Exception:
59
+ # logging.warning(f"Unable to get headers for {response_url}: {str(e)}")
60
+ content_type = ""
61
+ headers = {}
62
+
63
+ # Create response data structure
64
+ response_data = {
65
+ "url": response_url,
66
+ "status": response.status,
67
+ "method": response.request.method,
68
+ "content_type": content_type,
69
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
70
+ "headers": headers, # 保存响应头信息
71
+ "sse_messages": [],
72
+ }
73
+
74
+ if response.status >= 400:
75
+ response_data["error"] = f"HTTP {response.status}"
76
+ self.network_messages["responses"].append(response_data)
77
+ return
78
+
79
+ if "text/event-stream" in content_type:
80
+ current_request["is_sse"] = True
81
+ response_data["is_sse"] = True
82
+
83
+ try:
84
+ response_data["sse_pending"] = True
85
+ except Exception as e:
86
+ response_data["error"] = str(e)
87
+
88
+ else:
89
+ try:
90
+ if any(
91
+ bin_type in content_type.lower()
92
+ for bin_type in [
93
+ "image/",
94
+ "audio/",
95
+ "video/",
96
+ "application/pdf",
97
+ "application/octet-stream",
98
+ "font/",
99
+ "application/x-font",
100
+ ]
101
+ ):
102
+ response_data["body"] = f"<{content_type} binary data>"
103
+ response_data["size"] = len(await response.body())
104
+
105
+ elif "application/json" in content_type:
106
+ try:
107
+ body = await response.json()
108
+ response_data["body"] = body
109
+ except Exception as e:
110
+ response_data["error"] = f"JSON parse error: {str(e)}"
111
+
112
+ elif any(
113
+ text_type in content_type.lower()
114
+ for text_type in [
115
+ "text/",
116
+ "application/javascript",
117
+ "application/xml",
118
+ "application/x-www-form-urlencoded",
119
+ ]
120
+ ):
121
+ try:
122
+ text_body = await response.text()
123
+ response_data["body"] = text_body
124
+ except Exception as e:
125
+ response_data["error"] = f"Text decode error: {str(e)}"
126
+
127
+ else:
128
+ response_data["body"] = f"<{content_type} data>"
129
+ response_data["size"] = len(await response.body())
130
+
131
+ except Exception as e:
132
+ response_data["error"] = str(e)
133
+
134
+ self.network_messages["responses"].append(response_data)
135
+
136
+ except Exception:
137
+ pass
138
+
139
+ return response_callback
140
+
141
+ def _parse_sse_chunk(self, chunk):
142
+ """Parse SSE data chunk."""
143
+ messages = []
144
+ current_message = {}
145
+
146
+ for line in chunk.split("\n"):
147
+ line = line.strip()
148
+ if not line:
149
+ if current_message:
150
+ messages.append(current_message)
151
+ current_message = {}
152
+ continue
153
+
154
+ if line.startswith("data:"):
155
+ data = line[5:].strip()
156
+ try:
157
+ # try to parse JSON data
158
+ json_data = json.loads(data)
159
+ if "data" not in current_message:
160
+ current_message["data"] = json_data
161
+ else:
162
+ # if there is data, append new data to existing data
163
+ if isinstance(current_message["data"], list):
164
+ current_message["data"].append(json_data)
165
+ else:
166
+ current_message["data"] = [current_message["data"], json_data]
167
+ except json.JSONDecodeError:
168
+ if "data" not in current_message:
169
+ current_message["data"] = data
170
+ else:
171
+ current_message["data"] += "\n" + data
172
+ if current_message:
173
+ messages.append(current_message)
174
+
175
+ return messages
176
+
177
+ def _handle_request_finished(self):
178
+ async def request_finished_callback(request):
179
+ try:
180
+ response = await request.response()
181
+ if not response:
182
+ # logging.warning(f"No response object for request: {request.url}")
183
+ return
184
+ # logging.debug(f"Response object for request: {request.url}")
185
+ for req in self.network_messages["requests"]:
186
+ if req["url"] == request.url:
187
+ req["completed"] = True
188
+
189
+ if req.get("is_sse"):
190
+ try:
191
+ body = await response.body()
192
+ text = body.decode("utf-8", errors="replace")
193
+
194
+ # handle SSE messages
195
+ messages = []
196
+
197
+ # process SSE data by line
198
+ for line in text.split("\n"):
199
+ if not line:
200
+ continue
201
+
202
+ if not line.startswith("data:"):
203
+ continue
204
+
205
+ # extract data content
206
+ sse_data = line[5:].strip() # remove 'data:' prefix
207
+ if not sse_data:
208
+ continue
209
+
210
+ try:
211
+ # parse JSON data
212
+ json_data = json.loads(sse_data)
213
+ messages.append(
214
+ {
215
+ "data": json_data,
216
+ }
217
+ )
218
+ except json.JSONDecodeError:
219
+ # if not JSON, store original text
220
+ messages.append(
221
+ {
222
+ "data": sse_data,
223
+ }
224
+ )
225
+
226
+ req["sse_messages"] = messages
227
+
228
+ for resp in self.network_messages["responses"]:
229
+ if resp["url"] == request.url:
230
+ resp["sse_messages"] = messages
231
+ resp["sse_completed"] = True
232
+ break
233
+
234
+ except Exception:
235
+ pass
236
+ break
237
+
238
+ except Exception:
239
+ pass
240
+
241
+ return request_finished_callback
242
+
243
+ def get_messages(self):
244
+ return self.network_messages
245
+
246
+ def _on_request_failed(self, request):
247
+ # find and update request status
248
+ for req in self.network_messages["requests"]:
249
+ if req["url"] == request.url:
250
+ req["failed"] = True
251
+ break
252
+
253
+ error_data = {"url": request.url, "error": request.failure}
254
+ self.network_messages["failed_requests"].append(error_data)
255
+
256
+ def remove_listeners(self):
257
+ # Prefer Playwright's off() which understands internal wrapper mapping
258
+ listeners = [
259
+ ("request", self._request_callback),
260
+ ("response", self._response_callback),
261
+ ("requestfinished", self._requestfinished_callback),
262
+ ]
263
+ for event_name, handler in listeners:
264
+ try:
265
+ if hasattr(self.page, "off"):
266
+ self.page.off(event_name, handler)
267
+ else:
268
+ # Fallback for environments exposing remove_listener
269
+ self.page.remove_listener(event_name, handler)
270
+ except Exception:
271
+ # Silently ignore if already removed or not found
272
+ pass
273
+
274
+
275
+ class ConsoleCheck:
276
+ def __init__(self, page):
277
+ self.page = page
278
+ self.console_messages = []
279
+ self._setup_listeners()
280
+
281
+ def _setup_listeners(self):
282
+ self.page.on("console", self._handle_console)
283
+
284
+ def _handle_console(self, msg):
285
+ if msg.type == "error":
286
+ error_message = msg.text
287
+ error_location = getattr(msg, "location", None)
288
+ self.console_messages.append({"msg": error_message, "location": error_location})
289
+
290
+ def get_messages(self):
291
+ return self.console_messages
292
+
293
+ def remove_listeners(self):
294
+ try:
295
+ if hasattr(self.page, "off"):
296
+ self.page.off("console", self._handle_console)
297
+ else:
298
+ self.page.remove_listener("console", self._handle_console)
299
+ except Exception:
300
+ pass
webqa_agent/browser/config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SUPPORTED_BROWSERS = ["chromium", "firefox", "webkit"]
2
+ HEADLESS = [True, False]
3
+ # SUPPORTED_BROWSERS = ["firefox"]
4
+
5
+ SUPPORTED_RESOLUTIONS = [
6
+ {"width": 1366, "height": 768},
7
+ # {"width": 1440, "height": 900},
8
+ {"width": 1920, "height": 1080},
9
+ # {"width": 2560, "height": 1440},
10
+ ]
11
+
12
+ DEFAULT_CONFIG = {
13
+ "browser_type": "chromium",
14
+ "viewport": {"width": 1280, "height": 720},
15
+ "headless": True,
16
+ "language": "en-US",
17
+ }
webqa_agent/browser/driver.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+
4
+ from playwright.async_api import async_playwright
5
+
6
+
7
+ class Driver:
8
+ # Lock used to ensure thread-safety when multiple coroutines create Driver instances concurrently
9
+ __lock = asyncio.Lock()
10
+
11
+ @staticmethod
12
+ async def getInstance(browser_config, *args, **kwargs):
13
+ """Returns the singleton instance of the Driver class. If the instance
14
+ is closed, creates a new one.
15
+
16
+ Args:
17
+ browser_config (dict, optional): Browser configuration options.
18
+ """
19
+ logging.debug(f"Driver.getInstance called with browser_config: {browser_config}")
20
+
21
+ # Always create a *new* Driver instance – singleton restriction removed to
22
+ # allow multiple browsers to run in parallel. Keeping the public API
23
+ # unchanged ensures existing call-sites keep working.
24
+ async with Driver.__lock:
25
+ driver = Driver(browser_config=browser_config)
26
+ await driver.create_browser(browser_config=browser_config)
27
+ return driver
28
+
29
+ def __init__(self, browser_config=None, *args, **kwargs):
30
+ # Each call constructs an independent browser driver.
31
+ self._is_closed = False
32
+ self.page = None
33
+ self.browser = None
34
+ self.context = None
35
+ self.playwright = None
36
+
37
+ def is_closed(self):
38
+ """Check if the browser instance is closed."""
39
+ return getattr(self, "_is_closed", True)
40
+
41
+ async def create_browser(self, browser_config):
42
+ """Creates a new browser instance and sets up the page.
43
+
44
+ Args:
45
+ browser_config (dict, optional): Browser configuration containing:
46
+ - headless (bool): Whether to run browser in headless mode
47
+ - viewport_width (int): Browser viewport width
48
+ - viewport_height (int): Browser viewport height
49
+ - device_scale_factor (float): Device scale factor
50
+
51
+ Returns:
52
+ None
53
+ """
54
+ try:
55
+ # logging.debug(f"Driver create_browser called with browser_config: {browser_config}")
56
+
57
+ self.playwright = await async_playwright().start()
58
+ self.browser = await self.playwright.chromium.launch(
59
+ headless=browser_config["headless"],
60
+ args=[
61
+ "--disable-dev-shm-usage", # Mitigate shared memory issues in Docker
62
+ "--no-sandbox",
63
+ "--disable-setuid-sandbox",
64
+ "--disable-gpu",
65
+ "--force-device-scale-factor=1",
66
+ f'--window-size={browser_config["viewport"]["width"]},{browser_config["viewport"]["height"]}',
67
+ ],
68
+ )
69
+
70
+ # 创建新的上下文,使用配置的视口大小
71
+ self.context = await self.browser.new_context(
72
+ viewport={"width": browser_config["viewport"]["width"], "height": browser_config["viewport"]["height"]},
73
+ device_scale_factor=1,
74
+ is_mobile=False,
75
+ locale=browser_config["language"],
76
+ )
77
+ # await self.context.tracing.start(screenshots=True, snapshots=True)
78
+ self.page = await self.context.new_page()
79
+ browser_config["browser"] = "Chromium"
80
+ self.config = browser_config
81
+
82
+ logging.debug(f"Browser instance created successfully with config: {browser_config}")
83
+ return self.page
84
+
85
+ except Exception as e:
86
+ logging.error("Failed to create browser instance.", exc_info=True)
87
+ raise
88
+
89
+ def get_context(self):
90
+ try:
91
+ return self.context
92
+ except Exception as e:
93
+ logging.error("Failed to get context: %s", e, exc_info=True)
94
+ raise
95
+
96
+ def get_page(self):
97
+ """Returns the current page instance.
98
+
99
+ Returns:
100
+ Page: The current page instance.
101
+ """
102
+ try:
103
+ return self.page
104
+ except Exception as e:
105
+ logging.error("Failed to get Driver instance: %s", e, exc_info=True)
106
+ raise
107
+
108
+ async def get_new_page(self):
109
+ """Switches to the most recently opened page in the browser.
110
+
111
+ Returns:
112
+ Page: The new page instance.
113
+ """
114
+ try:
115
+ pages = self.context.pages
116
+ logging.debug(f"page number: {len(pages)}")
117
+ if len(pages) > 1:
118
+ logging.debug("New page detected.")
119
+ self.page = pages[-1]
120
+ return self.page
121
+ else:
122
+ return self.page
123
+ except Exception as e:
124
+ logging.error("Failed to get new page: %s", e, exc_info=True)
125
+ raise
126
+
127
+ async def close_browser(self):
128
+ """Closes the browser instance and stops Playwright."""
129
+ try:
130
+ if not self.is_closed():
131
+ await self.browser.close()
132
+ await self.playwright.stop()
133
+ self._is_closed = True # mark closed
134
+ logging.debug("Browser instance closed successfully.")
135
+ except Exception as e:
136
+ logging.error("Failed to close browser instance.", exc_info=True)
137
+ raise
webqa_agent/browser/session.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import uuid
5
+ from typing import Any, Dict, Optional, Union
6
+
7
+ from playwright.async_api import BrowserContext, Page
8
+
9
+ from webqa_agent.browser.config import DEFAULT_CONFIG
10
+
11
+ # Browser creation is now delegated to Driver to ensure a single entry-point.
12
+ from webqa_agent.browser.driver import Driver
13
+
14
+
15
+ class BrowserSession:
16
+ """Browser session manager for parallel test execution."""
17
+
18
+ def __init__(self, session_id: str = None, browser_config: Dict[str, Any] = None):
19
+ self.session_id = session_id or str(uuid.uuid4())
20
+ self.browser_config = {**DEFAULT_CONFIG, **(browser_config or {})}
21
+ self.driver: Optional[Driver] = None
22
+ # Driver will own browser, context, page and playwright instances
23
+ self._playwright = None # retained only for backward compatibility when needed
24
+ self._is_closed = False
25
+ self._lock = asyncio.Lock()
26
+
27
+ async def initialize(self):
28
+ """Initialize browser session."""
29
+ async with self._lock:
30
+ if self._is_closed:
31
+ raise RuntimeError("Browser session is closed")
32
+
33
+ logging.debug(f"Initializing browser session {self.session_id} with config: {self.browser_config}")
34
+
35
+ try:
36
+ # Use Driver as the single browser creation entry-point.
37
+ self.driver = await Driver.getInstance(browser_config=self.browser_config)
38
+
39
+ # Keep reference if external code needs direct access (optional)
40
+ self._playwright = self.driver.playwright
41
+
42
+ logging.debug(f"Browser session {self.session_id} initialized successfully via Driver")
43
+
44
+ except Exception as e:
45
+ logging.error(f"Failed to initialize browser session {self.session_id}: {e}")
46
+ await self._cleanup()
47
+ raise
48
+
49
+ async def navigate_to(self, url: str, cookies: Optional[Union[str, list]] = None, **kwargs):
50
+ """Navigate to URL."""
51
+ if self._is_closed or not self.driver:
52
+ raise RuntimeError("Browser session not initialized or closed")
53
+
54
+ logging.debug(f"Session {self.session_id} navigating to: {url}")
55
+ kwargs.setdefault("timeout", 60000)
56
+ kwargs.setdefault("wait_until", "domcontentloaded")
57
+
58
+ page = self.driver.get_page()
59
+
60
+ # Normalize cookies into list[dict] as required by Playwright.
61
+ if cookies:
62
+ try:
63
+ cookie_list: list
64
+ if isinstance(cookies, str):
65
+ cookie_list = json.loads(cookies)
66
+ elif isinstance(cookies, dict):
67
+ cookie_list = [cookies]
68
+ elif isinstance(cookies, (list, tuple)):
69
+ cookie_list = list(cookies)
70
+ else:
71
+ raise TypeError("Unsupported cookies type; expected str, dict or list")
72
+
73
+ if not isinstance(cookie_list, list):
74
+ raise ValueError("Parsed cookies is not a list")
75
+
76
+ await page.context.add_cookies(cookie_list)
77
+ logging.debug("Cookies added success")
78
+ except Exception as e:
79
+ logging.error(f"Failed to add cookies: {e}")
80
+
81
+ # Navigate to the target URL and wait until DOM is ready
82
+ try:
83
+ await page.goto(url, **kwargs)
84
+ await page.wait_for_load_state("networkidle", timeout=60000)
85
+ is_blank = await page.evaluate("!document.body || document.body.innerText.trim().length === 0")
86
+ logging.debug(f"Page content check: is_blank={is_blank}")
87
+ except Exception as e:
88
+ logging.warning(f"Error while page load after navigation: {e}")
89
+ is_blank = False # Fail open – don't block execution if evaluation fails
90
+
91
+ if is_blank:
92
+ raise RuntimeError(f"Page load timeout or blank content after navigation to {url}, Please check the url and try again.")
93
+
94
+ def get_page(self) -> Page:
95
+ """Return current page via Driver."""
96
+ if self._is_closed or not self.driver:
97
+ raise RuntimeError("Browser session not initialized or closed")
98
+ return self.driver.get_page()
99
+
100
+ def get_context(self) -> BrowserContext:
101
+ if self._is_closed or not self.driver:
102
+ raise RuntimeError("Browser session not initialized or closed")
103
+ return self.driver.get_context()
104
+
105
+ def is_closed(self) -> bool:
106
+ """Check if session is closed."""
107
+ return self._is_closed
108
+
109
+ async def _cleanup(self):
110
+ """Internal cleanup method."""
111
+ try:
112
+ # Delegate cleanup to Driver if available
113
+ if self.driver and not self.driver.is_closed():
114
+ await self.driver.close_browser()
115
+
116
+ except Exception as e:
117
+ logging.error(f"Error during cleanup: {e}")
118
+ finally:
119
+ self.driver = None
120
+ self._playwright = None
121
+
122
+ async def close(self):
123
+ """Close browser session."""
124
+ async with self._lock:
125
+ if self._is_closed:
126
+ return
127
+
128
+ logging.debug(f"Closing browser session {self.session_id}")
129
+ self._is_closed = True
130
+ await self._cleanup()
131
+ logging.debug(f"Browser session {self.session_id} closed")
132
+
133
+ async def __aenter__(self):
134
+ """Async context manager entry."""
135
+ await self.initialize()
136
+ return self
137
+
138
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
139
+ """Async context manager exit."""
140
+ await self.close()
141
+
142
+
143
+ class BrowserSessionManager:
144
+ """Manager for multiple browser sessions."""
145
+
146
+ def __init__(self):
147
+ self.sessions: Dict[str, BrowserSession] = {}
148
+ self._lock = asyncio.Lock()
149
+
150
+ async def browser_session(self, browser_config: Dict[str, Any] = None) -> BrowserSession:
151
+ """Create a new browser session."""
152
+ session = BrowserSession(browser_config=browser_config)
153
+ return session
154
+
155
+ async def create_session(self, browser_config: Dict[str, Any] = None) -> BrowserSession:
156
+ """Create a new browser session."""
157
+ session = BrowserSession(browser_config=browser_config)
158
+ await session.initialize()
159
+
160
+ async with self._lock:
161
+ self.sessions[session.session_id] = session
162
+
163
+ logging.debug(f"Created browser session: {session.session_id}")
164
+ return session
165
+
166
+ async def get_session(self, session_id: str) -> Optional[BrowserSession]:
167
+ """Get session by ID."""
168
+ async with self._lock:
169
+ return self.sessions.get(session_id)
170
+
171
+ async def close_session(self, session_id: str):
172
+ """Close and remove session."""
173
+ async with self._lock:
174
+ session = self.sessions.pop(session_id, None)
175
+ if session:
176
+ await session.close()
177
+ logging.debug(f"Closed session: {session_id}")
178
+
179
+ async def close_all_sessions(self):
180
+ """Close all sessions."""
181
+ async with self._lock:
182
+ sessions = list(self.sessions.values())
183
+ self.sessions.clear()
184
+
185
+ # Close sessions in parallel
186
+ if sessions:
187
+ await asyncio.gather(*[session.close() for session in sessions], return_exceptions=True)
188
+ logging.debug(f"Closed {len(sessions)} browser sessions")
189
+
190
+ def list_sessions(self) -> Dict[str, Dict[str, Any]]:
191
+ """List all active sessions."""
192
+ return {
193
+ session_id: {"browser_config": session.browser_config, "is_closed": session.is_closed()}
194
+ for session_id, session in self.sessions.items()
195
+ }
webqa_agent/crawler/__init__.py ADDED
File without changes
webqa_agent/crawler/crawl.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import List
3
+ from urllib.parse import urljoin, urlparse
4
+
5
+ from playwright.async_api import Page
6
+
7
+ from webqa_agent.crawler.deep_crawler import DeepCrawler, ElementKey
8
+
9
+
10
+ class CrawlHandler:
11
+ """Extract links and clickable elements from web pages."""
12
+
13
+ def __init__(self, base_url: str):
14
+ self.base_url = base_url
15
+ self.base_domain = urlparse(base_url).netloc
16
+
17
+ async def extract_links(self, page: Page) -> List[str]:
18
+ try:
19
+ links = await page.eval_on_selector_all("a", "elements => elements.map(el => el.href)")
20
+ script_links = await page.eval_on_selector_all("script[src]", "elements => elements.map(el => el.src)")
21
+ link_tags = await page.eval_on_selector_all("link[href]", "elements => elements.map(el => el.href)")
22
+
23
+ all_links = set(links + script_links + link_tags)
24
+
25
+ filtered_links = [
26
+ link
27
+ for link in all_links
28
+ if not (
29
+ link.endswith(".js")
30
+ or link.endswith(".css")
31
+ or link.endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp", ".svg"))
32
+ or link.endswith((".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx"))
33
+ or link.startswith("#")
34
+ or link.startswith("mailto:")
35
+ or link.startswith("tel:")
36
+ )
37
+ ]
38
+
39
+ absolute_links = [urljoin(self.base_url, link) for link in filtered_links]
40
+ return absolute_links
41
+
42
+ except Exception as e:
43
+ logging.error(f"Error extracting links: {str(e)}")
44
+ raise
45
+
46
+ def _normalize_link(self, link: str) -> str:
47
+ """Normalize a link URL."""
48
+ if not link:
49
+ return ""
50
+
51
+ # Handle relative URLs
52
+ if link.startswith("/"):
53
+ return urljoin(self.base_url, link)
54
+ elif link.startswith("#"):
55
+ # Skip anchor links
56
+ return ""
57
+ elif link.startswith("javascript:") or link.startswith("mailto:") or link.startswith("tel:"):
58
+ # Skip javascript, mailto and tel links
59
+ return ""
60
+
61
+ return link
62
+
63
+ def _is_valid_link(self, link: str) -> bool:
64
+ """Check if a link is valid for testing."""
65
+ if not link:
66
+ return False
67
+
68
+ try:
69
+ parsed = urlparse(link)
70
+
71
+ # Must have a scheme (http/https)
72
+ if parsed.scheme not in ["http", "https"]:
73
+ return False
74
+
75
+ # Skip file downloads
76
+ if any(link.lower().endswith(ext) for ext in [".pdf", ".doc", ".docx", ".xls", ".xlsx", ".zip", ".rar"]):
77
+ return False
78
+
79
+ # Only test links from the same domain (optional - can be configured)
80
+ if parsed.netloc and parsed.netloc != self.base_domain:
81
+ return False
82
+
83
+ return True
84
+
85
+ except Exception:
86
+ return False
87
+
88
+ async def clickable_elements_detection(self, page: Page):
89
+ try:
90
+ dp = DeepCrawler(page)
91
+ result = await dp.crawl()
92
+ clickable_elements = result.clean_dict([str(ElementKey.XPATH), str(ElementKey.SELECTOR)])
93
+ return clickable_elements
94
+
95
+ except Exception as e:
96
+ logging.error(f"Error detecting clickable elements on {self.base_url}: {str(e)}")
97
+ return []
webqa_agent/crawler/deep_crawler.py ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import datetime
3
+ import json
4
+ import time
5
+ import logging
6
+ import re
7
+
8
+ from pathlib import Path
9
+ from playwright.async_api import Page, async_playwright
10
+ from webqa_agent.crawler.dom_tree import DomTreeNode as dtree
11
+ from webqa_agent.crawler.dom_cacher import DomCacher
12
+ from typing import List, Dict, Optional, Any, Tuple, TypedDict, Union, Iterable
13
+ from pydantic import BaseModel, Field
14
+ from enum import Enum
15
+ from itertools import groupby
16
+
17
+
18
+ # ============================================================================
19
+ # UTILITY FUNCTIONS
20
+ # ============================================================================
21
+
22
+ def get_time() -> str:
23
+ """
24
+ Get the current time as a formatted string.
25
+ Timestamp format: YYYYMMDD_HH_MM_SS
26
+ """
27
+ return datetime.datetime.now().strftime("%Y%m%d_%H_%M_%S")
28
+
29
+
30
+ def _normalize_keys(template: Optional[Iterable[Union[str, "ElementKey"]]]) -> Optional[List[str]]:
31
+ """
32
+ Normalize template keys to string format.
33
+
34
+ Args:
35
+ template: Template containing ElementKey enums or strings.
36
+
37
+ Returns:
38
+ List of normalized string keys, or None if template is None.
39
+ """
40
+ if template is None:
41
+ return None
42
+
43
+ normalized = []
44
+ for key in template:
45
+ try:
46
+ # Handle both Enum and string types
47
+ normalized.append(key.value if hasattr(key, "value") else str(key))
48
+ except Exception:
49
+ normalized.append(str(key))
50
+ return normalized
51
+
52
+
53
+ # ============================================================================
54
+ # ENUMS AND CONSTANTS
55
+ # ============================================================================
56
+
57
+ class ElementKey(Enum):
58
+ """Enumeration for element attribute keys."""
59
+ NODE = "node"
60
+ TAG_NAME = "tagName"
61
+ CLASS_NAME = "className"
62
+ INNER_TEXT = "innerText"
63
+ ATTRIBUTES = "attributes"
64
+ VIEWPORT = "viewport"
65
+ CENTER_X = "center_x"
66
+ CENTER_Y = "center_y"
67
+ IS_VISIBLE = "isVisible"
68
+ IS_INTERACTIVE = "isInteractive"
69
+ IS_VALID_TEXT = "isValidText"
70
+ IS_TOP_ELEMENT = "isTopElement"
71
+ IS_IN_VIEWPORT = "isInViewport"
72
+ XPATH = "xpath"
73
+ SELECTOR = "selector"
74
+
75
+ def __str__(self) -> str:
76
+ """Return the string representation of the enum value."""
77
+ return self.value
78
+
79
+
80
+ DEFAULT_OUTPUT_TEMPLATE = [
81
+ ElementKey.TAG_NAME.value,
82
+ ElementKey.INNER_TEXT.value,
83
+ ElementKey.CENTER_X.value,
84
+ ElementKey.CENTER_Y.value
85
+ ]
86
+
87
+
88
+ # ============================================================================
89
+ # DATA MODELS
90
+ # ============================================================================
91
+
92
+ class ElementMap(BaseModel):
93
+ """A wrapper for a dictionary of elements that provides a cleansing method."""
94
+ data: Dict[str, Any] = Field(default_factory=dict)
95
+
96
+ def clean(self, output_template: Optional[List[str]] = None) -> Dict[str, Any]:
97
+ """
98
+ Cleanses the element map, returning a new dictionary with filtered attributes.
99
+
100
+ This method filters element data based on the output template and applies
101
+ additional cleaning logic to remove unwanted attributes like 'class' from
102
+ the attributes field.
103
+
104
+ Args:
105
+ output_template: A list of keys to include in the cleansed output.
106
+ If None, DEFAULT_OUTPUT_TEMPLATE is used.
107
+
108
+ Returns:
109
+ A dictionary with the cleansed element data.
110
+ """
111
+ if output_template is None:
112
+ output_template = DEFAULT_OUTPUT_TEMPLATE
113
+
114
+ def to_key(k):
115
+ """Convert key to string format."""
116
+ return k.value if hasattr(k, "value") else str(k)
117
+
118
+ def clean_attributes(attrs):
119
+ """Remove 'class' key from attributes."""
120
+ if not isinstance(attrs, dict):
121
+ return attrs
122
+
123
+ # Create a copy and remove 'class' key
124
+ cleaned_attrs = {k: v for k, v in attrs.items() if k != 'class'}
125
+ return cleaned_attrs
126
+
127
+ keys = [to_key(k) for k in output_template]
128
+ result = {}
129
+
130
+ for e_id, element_data in self.data.items():
131
+ cleaned_element = {}
132
+
133
+ for key in keys:
134
+ value = element_data.get(key)
135
+ if value is not None:
136
+ # Apply special cleaning for attributes field
137
+ if key == str(ElementKey.ATTRIBUTES):
138
+ cleaned_element[key] = clean_attributes(value)
139
+ else:
140
+ cleaned_element[key] = value
141
+
142
+ # Only include elements that have at least one valid field
143
+ if cleaned_element:
144
+ result[e_id] = cleaned_element
145
+
146
+ return result
147
+
148
+
149
+ class CrawlResultModel(BaseModel):
150
+ """Model for crawl results containing flattened and hierarchical element data."""
151
+ element_tree: Dict[str, Any] = Field(default_factory=dict)
152
+ flat_element_map: ElementMap = Field(default_factory=ElementMap)
153
+ diff_element_map: ElementMap = Field(default_factory=ElementMap)
154
+
155
+ def raw_dict(self) -> Dict[str, Any]:
156
+ """Get raw flattened element data with all fields."""
157
+ return self.flat_element_map.data
158
+
159
+ def clean_dict(self, template: Optional[Iterable[Union[str, "ElementKey"]]] = None) -> Dict[str, Any]:
160
+ """Get cleaned flattened element data with fields filtered by template."""
161
+ return self.flat_element_map.clean(output_template=_normalize_keys(template))
162
+
163
+ def diff_dict(self, template: Optional[Iterable[Union[str, "ElementKey"]]] = None) -> Dict[str, Any]:
164
+ """Get DOM difference element data with specified template."""
165
+ return self.diff_element_map.clean(output_template=_normalize_keys(template))
166
+
167
+ def to_llm_json(self, template: Optional[Iterable[Union[str, "ElementKey"]]] = None) -> str:
168
+ """Convert filtered elements to LLM-compatible JSON format."""
169
+ return json.dumps(self.clean_dict(template=template), ensure_ascii=False, separators=(",", ":"))
170
+
171
+
172
+ # ============================================================================
173
+ # MAIN CRAWLER CLASS
174
+ # ============================================================================
175
+
176
+ class DeepCrawler:
177
+ """
178
+ A deep crawler for recursively extracting structured element data from web pages.
179
+
180
+ This class injects JavaScript payloads into Playwright pages to build hierarchical
181
+ DOM element trees, capturing properties such as visibility, interactivity, and
182
+ positioning. It supports element highlighting for debugging and provides comprehensive
183
+ DOM change detection capabilities.
184
+
185
+ Key functionalities:
186
+ - Recursive DOM crawling with structured data extraction
187
+ - Interactive element identification and filtering
188
+ - Visual element highlighting for debugging purposes
189
+ - DOM change detection between crawl operations
190
+ - Screenshot capture and result serialization
191
+ """
192
+
193
+ # Class-level constants for file and directory paths
194
+ default_dir = Path(__file__).parent
195
+
196
+ # JavaScript injection files
197
+ DETECTOR_JS = default_dir / "js" / "element_detector.js"
198
+ REMOVER_JS = default_dir / "js" / "marker_remover.js"
199
+
200
+ # Output directories
201
+ RESULTS_DIR = default_dir / "results"
202
+ SCREENSHOTS_DIR = default_dir / "screenshots"
203
+
204
+ def __init__(self, page: Page, depth: int = 0):
205
+ """
206
+ Initialize the DeepCrawler instance.
207
+
208
+ Args:
209
+ page: The Playwright Page object to crawl.
210
+ depth: The current crawling depth level.
211
+
212
+ Raises:
213
+ ValueError: If page is not a valid Playwright Page object.
214
+ """
215
+ if not isinstance(page, Page):
216
+ raise ValueError("Crawler page must be a Playwright Page object")
217
+
218
+ self.page = page
219
+ self.depth = depth
220
+ self.element_tree = None # Hierarchical element tree structure
221
+ self.dom_cacher = DomCacher() # DOM change detection manager
222
+ self._cached_element_tree = None # Cached DOM tree for comparison
223
+ self._last_crawl_time = None # Timestamp of last crawl operation
224
+
225
+ # ------------------------------------------------------------------------
226
+ # CORE CRAWLING METHODS
227
+ # ------------------------------------------------------------------------
228
+
229
+ async def crawl(
230
+ self,
231
+ page: Optional[Page] = None,
232
+ highlight: bool = False,
233
+ highlight_text: bool = False,
234
+ viewport_only: bool = False,
235
+ include_styles: bool = False,
236
+ cache_dom: bool = False,
237
+ ) -> CrawlResultModel:
238
+ """Inject JavaScript to crawl the page and return structured element
239
+ data.
240
+
241
+ This method executes the element detector script in the browser context,
242
+ building a hierarchical representation of the DOM with detailed element
243
+ properties and optional visual highlighting.
244
+
245
+ Args:
246
+ page: The Playwright Page to crawl. Defaults to instance page.
247
+ highlight: Whether to visually highlight detected elements.
248
+ highlight_text: Whether to highlight text nodes (requires highlight=True).
249
+ viewport_only: Whether to restrict detection to current viewport.
250
+ include_styles: Whether to include styles in the result.
251
+ cache_dom: Whether to cache the DOM tree for change detection.
252
+
253
+ Returns:
254
+ CrawlResultModel containing the structured crawl data.
255
+ """
256
+ if page is None:
257
+ page = self.page
258
+
259
+ try:
260
+ # Build JavaScript payload for element detection
261
+ payload = (
262
+ f"(() => {{"
263
+ f"window._highlight = {str(highlight).lower()};"
264
+ f"window._highlightText = {str(highlight_text).lower()};\n"
265
+ f"window._viewportOnly = {str(viewport_only).lower()};\n"
266
+ f"window._includeStyles = {str(include_styles).lower()};\n"
267
+ f"\n{self.read_js(self.DETECTOR_JS)}"
268
+ f"\nreturn buildElementTree();"
269
+ f"}})()"
270
+ )
271
+
272
+ # Execute JavaScript and extract results
273
+ self.element_tree, flat_elements = await page.evaluate(payload)
274
+
275
+ # Create result model with extracted data
276
+ result = CrawlResultModel(
277
+ flat_element_map=ElementMap(data=flat_elements or {}),
278
+ element_tree=self.element_tree or {}
279
+ )
280
+
281
+ # Perform DOM change detection if caching is enabled
282
+ if cache_dom and self.element_tree:
283
+ dom_tree = dtree.build_root(self.element_tree)
284
+ self._cached_element_tree = dom_tree
285
+ self._last_crawl_time = time.time()
286
+
287
+ diff_elements = self.dom_cacher.detect_dom_diff(
288
+ current_tree=dom_tree,
289
+ current_url=page.url
290
+ )
291
+
292
+ logging.debug(f"DOM change result: {diff_elements}")
293
+
294
+ result.diff_element_map = ElementMap(data=self.extract_interactive_elements(get_new_elems=True))
295
+
296
+ return result
297
+
298
+ except Exception as e:
299
+ logging.error(f"JavaScript injection failed during element detection: {e}")
300
+ return CrawlResultModel()
301
+
302
+ def extract_interactive_elements(self, get_new_elems: bool = False) -> Dict:
303
+ """
304
+ Extract interactive elements with comprehensive attribute information.
305
+
306
+ Filters DOM nodes based on interactivity, visibility, and positioning
307
+ criteria to identify actionable elements on the page.
308
+
309
+ Args:
310
+ get_new_elems: Whether to return only newly detected elements.
311
+
312
+ Returns:
313
+ Dictionary mapping element IDs to their attribute dictionaries.
314
+ """
315
+ # Determine data source based on operation mode
316
+ if get_new_elems:
317
+ if not self._cached_element_tree:
318
+ return {}
319
+ root = self._cached_element_tree
320
+ else:
321
+ if not self.element_tree:
322
+ return {}
323
+ root = dtree.build_root(self.element_tree)
324
+
325
+ elements = {}
326
+
327
+ if root:
328
+ for node in root.pre_iter():
329
+ # Apply basic element filtering criteria
330
+ if not all([
331
+ node.isInteractive,
332
+ node.isVisible,
333
+ node.isTopElement,
334
+ node.center_x is not None,
335
+ node.center_y is not None
336
+ ]):
337
+ continue
338
+
339
+ # Filter for new elements when requested
340
+ if get_new_elems and not node.is_new:
341
+ continue
342
+
343
+ # Validate viewport dimensions
344
+ viewport = node.viewport or {}
345
+ if viewport.get("width") is None or viewport.get("height") is None:
346
+ continue
347
+
348
+ # Build comprehensive element attribute dictionary
349
+ elements[str(node.highlightIndex)] = {
350
+ str(ElementKey.TAG_NAME): node.tagName,
351
+ str(ElementKey.CLASS_NAME): node.className,
352
+ str(ElementKey.INNER_TEXT): node.innerText[:200],
353
+ str(ElementKey.ATTRIBUTES): node.attributes,
354
+ str(ElementKey.VIEWPORT): node.viewport,
355
+ str(ElementKey.CENTER_X): node.center_x,
356
+ str(ElementKey.CENTER_Y): node.center_y,
357
+ str(ElementKey.IS_VISIBLE): node.isVisible,
358
+ str(ElementKey.IS_INTERACTIVE): node.isInteractive,
359
+ str(ElementKey.IS_TOP_ELEMENT): node.isTopElement,
360
+ str(ElementKey.IS_IN_VIEWPORT): node.isInViewport,
361
+ str(ElementKey.XPATH): node.xpath,
362
+ str(ElementKey.SELECTOR): node.selector
363
+ }
364
+
365
+ return elements
366
+
367
+ def get_text(self, fmt: str = "json") -> str:
368
+ """
369
+ Extract and concatenate all text content from the crawled DOM tree.
370
+
371
+ This method intelligently filters text content to avoid duplicates and wrapper nodes,
372
+ collecting only meaningful leaf text nodes and deduplicating consecutive identical texts.
373
+
374
+ Args:
375
+ fmt: Output format, currently supports "json" (default).
376
+
377
+ Returns:
378
+ JSON string containing array of extracted text content.
379
+ """
380
+
381
+ def _normalize_text(s: str) -> str:
382
+ """Normalize text by collapsing whitespace and trimming."""
383
+ s = re.sub(r'\s+', ' ', s).strip()
384
+ return s
385
+
386
+ def _has_text(n) -> bool:
387
+ """Check if a node has meaningful text content."""
388
+ return bool(getattr(n, "innerText", None) and n.innerText.strip())
389
+
390
+ def _is_leaf_text_node(n) -> bool:
391
+ """Determine if a node is a leaf text node (no children with text)."""
392
+ children = getattr(n, "children", None) or []
393
+ return not any(_has_text(c) for c in children)
394
+
395
+ def _dedupe_consecutive(seq):
396
+ """Remove consecutive duplicate items from sequence."""
397
+ return [k for k, _ in groupby(seq)]
398
+
399
+ # Early return if no element tree available
400
+ if not self.element_tree:
401
+ return ""
402
+
403
+ # Build DOM tree from hierarchical data
404
+ root = dtree.build_root(self.element_tree)
405
+ if root is None:
406
+ return ""
407
+
408
+ # Collect only leaf text nodes and skip wrapper nodes
409
+ items = []
410
+ for n in root.pre_iter():
411
+ # Skip nodes without meaningful text
412
+ if not _has_text(n):
413
+ continue
414
+
415
+ # For non-leaf nodes, check if they're wrapper nodes
416
+ if not _is_leaf_text_node(n):
417
+ # Skip "wrapper" nodes: parent text identical to any direct child text
418
+ normalized_text = _normalize_text(n.innerText)
419
+ child_texts = [
420
+ _normalize_text(c.innerText)
421
+ for c in (n.children or [])
422
+ if _has_text(c)
423
+ ]
424
+ # Skip if parent text matches any child text (wrapper node)
425
+ if normalized_text in child_texts:
426
+ continue
427
+
428
+ # Add normalized text to collection
429
+ items.append(_normalize_text(n.innerText))
430
+
431
+ # Final deduplication: collapse adjacent duplicates
432
+ items = _dedupe_consecutive(items)
433
+
434
+ # Return as compact JSON array
435
+ return json.dumps(items, ensure_ascii=False, separators=(",", ":"))
436
+
437
+ # ------------------------------------------------------------------------
438
+ # DOM CACHE MANAGEMENT
439
+ # ------------------------------------------------------------------------
440
+
441
+ def clear_dom_cache(self) -> None:
442
+ """Clear the DOM change detection cache and reset internal state."""
443
+ self.dom_cacher.clear_cache()
444
+ self._cached_element_tree = None
445
+ self._last_crawl_time = None
446
+
447
+ # ------------------------------------------------------------------------
448
+ # UTILITY METHODS
449
+ # ------------------------------------------------------------------------
450
+
451
+ @staticmethod
452
+ def read_js(file_path: Path) -> str:
453
+ """
454
+ Read and return the content of a JavaScript file.
455
+
456
+ Args:
457
+ file_path: Path to the JavaScript file.
458
+
459
+ Returns:
460
+ The content of the JavaScript file as a string.
461
+ """
462
+ with open(file_path, "r", encoding="utf-8") as file:
463
+ return file.read()
464
+
465
+ @staticmethod
466
+ def dump_json(node: Dict[str, Any], path: Path) -> None:
467
+ """
468
+ Serialize a dictionary to a JSON file with proper formatting.
469
+
470
+ Args:
471
+ node: The dictionary to serialize.
472
+ path: The output file path.
473
+ """
474
+ path.parent.mkdir(parents=True, exist_ok=True)
475
+ with open(path, 'w', encoding='utf-8') as f:
476
+ json.dump(node, f, ensure_ascii=False, indent=2)
477
+
478
+ # ------------------------------------------------------------------------
479
+ # VISUAL DEBUGGING METHODS
480
+ # ------------------------------------------------------------------------
481
+
482
+ async def remove_marker(self, page: Optional[Page] = None) -> None:
483
+ """
484
+ Remove visual highlight markers from the page.
485
+
486
+ Args:
487
+ page: The Playwright Page to clean. Defaults to instance page.
488
+ """
489
+ if page is None:
490
+ page = self.page
491
+ try:
492
+ script = self.read_js(self.REMOVER_JS)
493
+ await page.evaluate(script)
494
+ except Exception as e:
495
+ logging.error(f"Failed to remove highlight markers: {e}")
496
+
497
+ async def take_screenshot(
498
+ self,
499
+ page: Optional[Page] = None,
500
+ screenshot_path: Optional[str] = None
501
+ ) -> None:
502
+ """
503
+ Capture a full-page screenshot and save it to disk.
504
+
505
+ Args:
506
+ page: The Playwright Page to screenshot. Defaults to instance page.
507
+ screenshot_path: Custom path for the screenshot. Auto-generated if None.
508
+ """
509
+ if page is None:
510
+ page = self.page
511
+
512
+ if screenshot_path:
513
+ path = Path(screenshot_path)
514
+ else:
515
+ path = self.SCREENSHOTS_DIR / f"{get_time()}_marker.png"
516
+
517
+ path.parent.mkdir(parents=True, exist_ok=True)
518
+ await page.screenshot(path=str(path), full_page=True)
519
+ logging.debug(f"Screenshot saved to {path}")
webqa_agent/crawler/dom_cacher.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Set, Optional, Dict, Any
3
+ from webqa_agent.crawler.dom_tree import DomTreeNode
4
+ import time
5
+
6
+
7
+ @dataclass
8
+ class CachedDomState:
9
+ """
10
+ DOM state cache class.
11
+
12
+ Used to store DOM state snapshots for change detection.
13
+ """
14
+ url: str
15
+ clickable_element_hashes: Set[str]
16
+ timestamp: float
17
+
18
+
19
+ class DomCacher:
20
+ """
21
+ DOM change detector.
22
+
23
+ Provides DOM state comparison and change detection functionality.
24
+ """
25
+
26
+ def __init__(self):
27
+ self._cached_state: Optional[CachedDomState] = None
28
+
29
+ def detect_dom_diff(self,
30
+ current_tree: DomTreeNode,
31
+ current_url: str) -> Dict[str, Any]:
32
+ """
33
+ Detect DOM changes.
34
+
35
+ Args:
36
+ current_tree: Current DOM tree.
37
+ current_url: Current page URL.
38
+
39
+ Returns:
40
+ Dict[str, Any]: Change detection results.
41
+ """
42
+
43
+ # Get current clickable element hashes
44
+ current_hashes = current_tree.get_clickable_elements_hashes()
45
+
46
+ result = {
47
+ 'has_changes': False,
48
+ 'new_elements_count': 0,
49
+ 'removed_elements_count': 0,
50
+ 'total_elements': len(current_hashes)
51
+ }
52
+
53
+ # If cached state exists and URL matches, perform comparison
54
+ if self._cached_state and self._cached_state.url == current_url:
55
+ cached_hashes = self._cached_state.clickable_element_hashes
56
+
57
+ # Calculate new and removed elements
58
+ new_hashes = current_hashes - cached_hashes
59
+ removed_hashes = cached_hashes - current_hashes
60
+
61
+ result.update({
62
+ 'has_changes': len(new_hashes) > 0 or len(removed_hashes) > 0,
63
+ 'new_elements_count': len(new_hashes),
64
+ 'removed_elements_count': len(removed_hashes),
65
+ # 'new_element_hashes': new_hashes,
66
+ # 'removed_element_hashes': removed_hashes
67
+ })
68
+
69
+ # Mark new elements
70
+ current_tree.mark_new_elements(cached_hashes)
71
+
72
+ # Update cached state
73
+ self._cached_state = CachedDomState(
74
+ url=current_url,
75
+ clickable_element_hashes=current_hashes,
76
+ timestamp=time.time()
77
+ )
78
+
79
+ return result
80
+
81
+ def clear_cache(self) -> None:
82
+ """
83
+ Clear cached state.
84
+ """
85
+ self._cached_state = None
86
+
87
+ def get_cached_state(self) -> Optional[CachedDomState]:
88
+ """
89
+ Get cached state.
90
+
91
+ Returns:
92
+ Optional[CachedDomState]: Cached DOM state.
93
+ """
94
+ return self._cached_state
webqa_agent/crawler/dom_tree.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import copy
3
+ import hashlib
4
+ import logging
5
+ from dataclasses import dataclass, field
6
+ from typing import List, Dict, Optional, Any, Set
7
+ from collections import Counter
8
+
9
+
10
+ @dataclass
11
+ class DomTreeNode:
12
+ """
13
+ A data class representing a node in a simplified Document Object Model (DOM) tree.
14
+
15
+ This class captures essential information about a DOM element, including its identity,
16
+ attributes, layout, and state (e.g., visibility, interactivity). It also maintains
17
+ the tree structure through parent-child relationships.
18
+
19
+ Attributes:
20
+ id (Optional[int]): A unique identifier for the element, generated from HTML.
21
+ highlightIndex (Optional[int]): An index used for highlighting the element on the page.
22
+ tagName (Optional[str]): The HTML tag name of the element (e.g., 'div', 'a').
23
+ className (Optional[str]): The 'class' attribute of the element.
24
+ innerText (str): The trimmed text content of the element.
25
+ element_type (Optional[str]): The 'type' attribute, typically for <input> elements.
26
+ placeholder (Optional[str]): The 'placeholder' attribute of the element.
27
+ attributes (Dict[str, str]): A dictionary of all HTML attributes of the element.
28
+ selector (str): A generated CSS selector for the element.
29
+ xpath (str): A generated XPath for the element.
30
+ viewport (Dict[str, float]): A dictionary containing the element's bounding box relative to the viewport.
31
+ center_x (Optional[float]): The horizontal center coordinate of the element.
32
+ center_y (Optional[float]): The vertical center coordinate of the element.
33
+ isVisible (Optional[bool]): A flag indicating if the element is visible.
34
+ isInteractive (Optional[bool]): A flag indicating if the element is interactive.
35
+ isTopElement (Optional[bool]): A flag indicating if the element is the topmost element at its center.
36
+ isInViewport (Optional[bool]): A flag indicating if the element is within the current viewport.
37
+ parent (Optional['DomTreeNode']): A reference to the parent node in the tree.
38
+ children (List['DomTreeNode']): A list of child nodes.
39
+ depth (int): The depth of the node in the tree (root is at depth 0).
40
+ subtree (Dict[str, Any]): A copy of the raw subtree data from the crawler, if any.
41
+
42
+ """
43
+
44
+ # Mapped from original node fields
45
+ id: Optional[int] = None
46
+ highlightIndex: Optional[int] = None
47
+ tagName: Optional[str] = None
48
+ className: Optional[str] = None
49
+ innerText: str = ""
50
+ element_type: Optional[str] = None
51
+ placeholder: Optional[str] = None
52
+
53
+ # Attributes converted from a list to a dictionary
54
+ attributes: Dict[str, str] = field(default_factory=dict)
55
+
56
+ # Added selector, xpath
57
+ selector: str = ""
58
+ xpath: str = ""
59
+
60
+ # Layout information
61
+ viewport: Dict[str, float] = field(default_factory=dict)
62
+ center_x: Optional[float] = None
63
+ center_y: Optional[float] = None
64
+
65
+ # boolean flags
66
+ isVisible: Optional[bool] = None
67
+ isInteractive: Optional[bool] = None
68
+ isTopElement: Optional[bool] = None
69
+ isInViewport: Optional[bool] = None
70
+
71
+ # Parent node
72
+ parent: Optional['DomTreeNode'] = None
73
+ # Child nodes
74
+ children: List['DomTreeNode'] = field(default_factory=list)
75
+ # Depth
76
+ depth: int = 0
77
+ # Sub DOM tree
78
+ subtree: Dict[str, Any] = field(default_factory=dict)
79
+
80
+ def __repr__(self):
81
+ """Returns a string representation of the DomTreeNode."""
82
+ return f"<DomTreeNode id={self.id!r} tag={self.tagName!r} depth={self.depth}>"
83
+
84
+ def add_child(self, child: 'DomTreeNode') -> None:
85
+ """
86
+ Adds a child node to self.children and sets its parent and depth.
87
+ """
88
+ child.parent = self
89
+ child.depth = self.depth + 1
90
+ self.children.append(child)
91
+
92
+ def find_by_tag(self, tag_name: str) -> List['DomTreeNode']:
93
+ """
94
+ Recursively finds all nodes matching the tag_name.
95
+ """
96
+ matches: List['DomTreeNode'] = []
97
+ if self.tagName == tag_name:
98
+ matches.append(self)
99
+ for c in self.children:
100
+ matches.extend(c.find_by_tag(tag_name))
101
+ return matches
102
+
103
+ def find_by_id(self, target_id: int) -> Optional['DomTreeNode']:
104
+ """
105
+ Performs a depth-first search to find the first node with id == target_id.
106
+ Returns None if not found.
107
+ """
108
+ if self.highlightIndex == target_id:
109
+ return self
110
+
111
+ for c in self.children:
112
+ result = c.find_by_id(target_id)
113
+ if result is not None:
114
+ return result
115
+
116
+ return None
117
+
118
+ @classmethod
119
+ def build_root(cls, data: Dict[str, Any]) -> 'DomTreeNode':
120
+ """
121
+ Constructs a DomTreeNode tree from a raw dictionary, typically from JSON.
122
+
123
+ This class method serves as the primary entry point for creating a tree from
124
+ the data returned by the crawler. It handles cases where the input data might
125
+ not have a single root 'node' by wrapping it in a synthetic root.
126
+
127
+ Args:
128
+ data: The raw dictionary representing the DOM subtree.
129
+
130
+ Returns:
131
+ The root DomTreeNode of the constructed tree.
132
+ """
133
+ if data.get('node') is None:
134
+ fake_node = {
135
+ 'node': {
136
+ 'id': None,
137
+ 'highlightIndex': None,
138
+ 'tagName': '__root__',
139
+ 'className': None,
140
+ 'innerText': '',
141
+ 'type': None,
142
+ 'placeholder': None,
143
+ 'attributes': [],
144
+ 'selector': None,
145
+ 'xpath': None,
146
+ 'viewport': {},
147
+ 'center_x': None,
148
+ 'center_y': None,
149
+ 'isVisible': True,
150
+ 'isInteractive': False,
151
+ 'isTopElement': False,
152
+ 'isInViewport': True
153
+ },
154
+ 'children': [data],
155
+ 'subtree': []
156
+ }
157
+
158
+ data = fake_node
159
+
160
+ def build_dom_tree(data: Dict[str, Any],
161
+ parent: Optional['DomTreeNode'] = None,
162
+ depth: int = 0) -> List['DomTreeNode']:
163
+ """
164
+ Builds a list of DomTreeNode from the injected JS result (nested dict).
165
+ Returns a list of top-level (or multi-root) nodes.
166
+ """
167
+ nodes: List[DomTreeNode] = []
168
+ node_data = data.get('node')
169
+ children_data = data.get('children', [])
170
+ subtree_data = copy.deepcopy(data.get("subtree", {}))
171
+
172
+ if node_data:
173
+ attrs = {a['name']: a['value'] for a in node_data.get('attributes', [])}
174
+
175
+ node = cls(
176
+ id=node_data.get('id'),
177
+ highlightIndex=node_data.get('highlightIndex'),
178
+ tagName=(node_data.get('tagName') or '').lower() or None,
179
+ className=node_data.get('className'),
180
+ innerText=(node_data.get('innerText') or '').strip(),
181
+ element_type=node_data.get('type'),
182
+ placeholder=node_data.get('placeholder'),
183
+
184
+ attributes=attrs,
185
+ selector=node_data.get('selector'),
186
+ xpath=node_data.get('xpath'),
187
+ viewport=node_data.get('viewport', {}),
188
+ center_x=node_data.get('center_x'),
189
+ center_y=node_data.get('center_y'),
190
+
191
+ isVisible=node_data.get('isVisible'),
192
+ isInteractive=node_data.get('isInteractive'),
193
+ isTopElement=node_data.get('isTopElement'),
194
+ isInViewport=node_data.get('isInViewport'),
195
+
196
+ subtree=subtree_data,
197
+ parent=parent,
198
+ depth=depth
199
+ )
200
+
201
+ for cd in children_data:
202
+ for child in build_dom_tree(cd, parent=node, depth=depth + 1):
203
+ node.add_child(child)
204
+
205
+ nodes.append(node)
206
+
207
+ else:
208
+ for cd in children_data:
209
+ nodes.extend(build_dom_tree(cd, parent=parent, depth=depth))
210
+
211
+ return nodes
212
+
213
+ roots = build_dom_tree(data)
214
+
215
+ return roots[0]
216
+
217
+ def pre_iter(self) -> List['DomTreeNode']:
218
+ """Performs a pre-order traversal and returns a list of nodes."""
219
+ nodes = [self]
220
+ for c in self.children:
221
+ nodes.extend(c.pre_iter())
222
+ return nodes
223
+
224
+ def post_iter(self) -> List['DomTreeNode']:
225
+ """Performs a post-order traversal and returns a list of nodes."""
226
+ nodes: List['DomTreeNode'] = []
227
+ for c in self.children:
228
+ nodes.extend(c.post_iter())
229
+ nodes.append(self)
230
+ return nodes
231
+
232
+ def count_depth(self) -> Dict[int, int]:
233
+ """Counts the number of nodes at each depth level."""
234
+ counts = Counter(n.depth for n in self.pre_iter())
235
+ return dict(counts)
236
+
237
+ # Change detection related fields
238
+ is_new: Optional[bool] = None # Mark if element is new
239
+ element_hash: Optional[str] = None # Element hash value
240
+
241
+ def calculate_element_hash(self) -> str:
242
+ """
243
+ Calculate unique hash value for the element.
244
+
245
+ Hash is generated based on:
246
+ - Parent path
247
+ - Element attributes
248
+ - XPath
249
+
250
+ Returns:
251
+ str: SHA256 hash value of the element.
252
+ """
253
+ # Get parent path
254
+ parent_path = self._get_parent_branch_path()
255
+ parent_path_str = '/'.join(parent_path)
256
+
257
+ # Get attributes string
258
+ # attrs_str = ''.join(f'{k}={v}' for k, v in sorted(self.attributes.items()))
259
+
260
+ # Combine hash source
261
+ # hash_source = f"{parent_path_str}|{attrs_str}|{self.xpath}"
262
+ hash_source = f"{parent_path_str}|{self.xpath}"
263
+ # logging.debug(f"hash_source of elem {self.highlightIndex} ({self.innerText}):\nparent_path_str: {parent_path_str}\nxpath: {self.xpath}")
264
+
265
+ # Calculate SHA256 hash
266
+ self.element_hash = hashlib.sha256(hash_source.encode()).hexdigest()
267
+ return self.element_hash
268
+
269
+ def _get_parent_branch_path(self) -> List[str]:
270
+ """
271
+ Get parent path from root node to current node.
272
+
273
+ Returns:
274
+ List[str]: List of parent tag names.
275
+ """
276
+ path = []
277
+ current = self
278
+ while current.parent is not None:
279
+ path.append(current.tagName or '')
280
+ current = current.parent
281
+ path.reverse()
282
+ return path
283
+
284
+ def get_clickable_elements(self) -> List['DomTreeNode']:
285
+ """
286
+ Get all clickable elements.
287
+
288
+ Returns:
289
+ List[DomTreeNode]: List of clickable elements.
290
+ """
291
+ clickable_elements = []
292
+
293
+ # 检查当前节点是否可点击
294
+ if (self.isInteractive and
295
+ self.isVisible and
296
+ self.isTopElement and
297
+ self.highlightIndex is not None):
298
+ clickable_elements.append(self)
299
+
300
+ # 递归检查子节点
301
+ for child in self.children:
302
+ clickable_elements.extend(child.get_clickable_elements())
303
+
304
+ return clickable_elements
305
+
306
+ def get_clickable_elements_hashes(self) -> Set[str]:
307
+ """
308
+ Get hash set of all clickable elements.
309
+
310
+ Returns:
311
+ Set[str]: Hash set of clickable elements.
312
+ """
313
+ clickable_elements = self.get_clickable_elements()
314
+ return {elem.calculate_element_hash() for elem in clickable_elements}
315
+
316
+ def find_element_by_hash(self, target_hash: str) -> Optional['DomTreeNode']:
317
+ """
318
+ Find element by hash value.
319
+
320
+ Args:
321
+ target_hash: Target element hash value.
322
+
323
+ Returns:
324
+ Optional[DomTreeNode]: Found element node, None if not found.
325
+ """
326
+ if self.calculate_element_hash() == target_hash:
327
+ return self
328
+
329
+ for child in self.children:
330
+ result = child.find_element_by_hash(target_hash)
331
+ if result is not None:
332
+ return result
333
+
334
+ return None
335
+
336
+ def mark_new_elements(self, cached_hashes: Set[str]) -> None:
337
+ """
338
+ Mark newly appeared elements.
339
+
340
+ Args:
341
+ cached_hashes: Cached element hash set.
342
+ """
343
+ # 标记当前元素
344
+ if (self.isInteractive and
345
+ self.isVisible and
346
+ self.isTopElement and
347
+ self.highlightIndex is not None):
348
+ current_hash = self.calculate_element_hash()
349
+ self.is_new = current_hash not in cached_hashes
350
+
351
+ # 递归标记子元素
352
+ for child in self.children:
353
+ child.mark_new_elements(cached_hashes)
webqa_agent/crawler/js/element_detector.js ADDED
@@ -0,0 +1,1030 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is modified from:
2
+ // https://github.com/browser-use/browser-use/browser_use/dom/dom_tree/index.js
3
+ //
4
+ // Copyright (c) 2024 Gregor Zunic
5
+ //
6
+ // Licensed under the MIT License
7
+
8
+ /**
9
+ * DOM Element Detection and Highlighting System
10
+ *
11
+ * This module provides comprehensive functionality for detecting, analyzing, and highlighting
12
+ * interactive elements and meaningful text content within web pages. It includes:
13
+ *
14
+ * - Interactive element detection with heuristic analysis
15
+ * - Text element validation and extraction
16
+ * - Visual highlighting with overlay rendering
17
+ * - DOM tree construction with filtering capabilities
18
+ * - Viewport-aware element processing
19
+ * - Event listener detection and cursor analysis
20
+ *
21
+ * Key Features:
22
+ * - Supports both interactive element and text content highlighting modes
23
+ * - Handles nested elements with distinct interaction boundary detection
24
+ * - Provides robust visibility and top-element checking
25
+ * - Includes performance optimizations with caching mechanisms
26
+ * - Supports iframe and Shadow DOM contexts
27
+ */
28
+
29
+ (function () {
30
+ window._highlight = window._highlight ?? true; // RenderHighlight Switch
31
+ window._highlightText = window._highlightText ?? false; // RenderTextHighlight Switch
32
+ window._viewportOnly = window._viewportOnly ?? false; // Viewport Highlight Only
33
+ let idCounter = 1;
34
+ let highlightIndex = 1;
35
+ const elementToId = new WeakMap();
36
+ const highlightMap = new WeakMap();
37
+ let highlightIdMap = new WeakMap();
38
+ const styleCache = new WeakMap();
39
+ const _elementHighlightColorMap = new WeakMap();
40
+ const INTERACTIVE_TAGS = new Set(['a', 'button', 'input', 'select', 'textarea', 'summary', 'details', 'label', 'option']);
41
+ const INTERACTIVE_ROLES = new Set(['button', 'link', 'menuitem', 'menuitemradio', 'menuitemcheckbox', 'radio', 'checkbox', 'tab', 'switch', 'slider', 'spinbutton', 'combobox', 'searchbox', 'textbox', 'listbox', 'option', 'scrollbar']);
42
+ const palette = ['#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff']; // highlighting colors
43
+ const overlayContainer = document.getElementById('__marker_container__') || (() => { // highlight container
44
+ const c = document.createElement('div');
45
+ c.id = '__marker_container__';
46
+ Object.assign(c.style, {
47
+ position: 'fixed',
48
+ top: '0',
49
+ left: '0',
50
+ width: '100vw',
51
+ height: '100vh',
52
+ pointerEvents: 'none',
53
+ zIndex: '2147483647'
54
+ });
55
+ document.body.appendChild(c);
56
+ return c;
57
+ })();
58
+
59
+ // ============================= Element Information Extraction =============================
60
+ /**
61
+ * Retrieves a unique identifier for a given HTML element.
62
+ *
63
+ * If the element does not already have a 'id' attribute, this function assigns a new,
64
+ * auto-incrementing ID to it. This ensures that every element can be uniquely identified
65
+ * during the crawling process.
66
+ *
67
+ * @param {HTMLElement} elem The HTML element for which to get the ID.
68
+ * @returns {number} The unique integer ID of the element.
69
+ */
70
+ function getElementId(elem) {
71
+ if (!elementToId.has(elem)) elementToId.set(elem, idCounter++);
72
+ return elementToId.get(elem);
73
+ }
74
+
75
+ /**
76
+ * Retrieves the computed CSS style for an element, using a cache to avoid redundant calculations.
77
+ *
78
+ * This function fetches the `CSSStyleDeclaration` object for an element. To optimize performance,
79
+ * it caches the result based on the element's unique ID. Subsequent calls for the same element
80
+ * will return the cached style object, reducing layout reflows.
81
+ *
82
+ * @param {HTMLElement} elem The HTML element to get the style for.
83
+ * @returns {CSSStyleDeclaration} The computed style object.
84
+ */
85
+ function getCachedStyle(elem) {
86
+ if (!styleCache.has(elem)) {
87
+ styleCache.set(elem, window.getComputedStyle(elem));
88
+ }
89
+ return styleCache.get(elem);
90
+ }
91
+
92
+ /**
93
+ * Return a compact subset of computed styles for model consumption
94
+ */
95
+ function getStyleSubset(elem) {
96
+ if (!window._includeStyles) return null;
97
+ try {
98
+ const s = getCachedStyle(elem);
99
+ return {
100
+ display: s.display,
101
+ visibility: s.visibility,
102
+ opacity: s.opacity,
103
+ position: s.position,
104
+ zIndex: s.zIndex,
105
+ overflowX: s.overflowX,
106
+ overflowY: s.overflowY,
107
+ textOverflow: s.textOverflow,
108
+ whiteSpace: s.whiteSpace,
109
+ wordBreak: s.wordBreak,
110
+ wordWrap: s.wordWrap,
111
+ fontSize: s.fontSize,
112
+ fontWeight: s.fontWeight,
113
+ lineHeight: s.lineHeight,
114
+ color: s.color,
115
+ backgroundColor: s.backgroundColor,
116
+ pointerEvents: s.pointerEvents,
117
+ cursor: s.cursor,
118
+ // 布局相关
119
+ width: s.width,
120
+ height: s.height,
121
+ maxWidth: s.maxWidth,
122
+ maxHeight: s.maxHeight,
123
+ margin: s.margin,
124
+ padding: s.padding,
125
+ // 对齐相关
126
+ textAlign: s.textAlign,
127
+ verticalAlign: s.verticalAlign,
128
+ justifyContent: s.justifyContent,
129
+ alignItems: s.alignItems,
130
+ flexDirection: s.flexDirection,
131
+ gap: s.gap,
132
+ // 边框和背景
133
+ border: s.border,
134
+ borderColor: s.borderColor,
135
+ borderWidth: s.borderWidth,
136
+ outline: s.outline,
137
+ backgroundImage: s.backgroundImage,
138
+ backgroundSize: s.backgroundSize,
139
+ objectFit: s.objectFit,
140
+ };
141
+ } catch (e) {
142
+ return null;
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Determines if an element is heuristically interactive based on various signals.
148
+ *
149
+ * This function uses heuristic analysis to identify elements that may be interactive
150
+ * even if they don't have explicit interactive attributes. It checks for:
151
+ * 1. Interactive attributes (role, tabindex, onclick)
152
+ * 2. Semantic class names suggesting interactivity
153
+ * 3. Placement within known interactive containers
154
+ * 4. Presence of visible children
155
+ * 5. Avoids top-level body children (likely layout containers)
156
+ *
157
+ * @param {HTMLElement} element The element to evaluate for heuristic interactivity.
158
+ * @returns {boolean} `true` if the element appears to be heuristically interactive, otherwise `false`.
159
+ */
160
+ function isHeuristicallyInteractive(element) {
161
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) return false;
162
+
163
+ // Skip non-visible elements early for performance
164
+ if (!isVisible(element)) return false;
165
+
166
+ // Check for common attributes that often indicate interactivity
167
+ const hasInteractiveAttributes =
168
+ element.hasAttribute('role') ||
169
+ element.hasAttribute('tabindex') ||
170
+ element.hasAttribute('onclick') ||
171
+ typeof element.onclick === 'function';
172
+
173
+ // Check for semantic class names suggesting interactivity
174
+ const hasInteractiveClass = /\b(btn|clickable|menu|item|entry|link)\b/i.test(element.className || '');
175
+
176
+ // Determine whether the element is inside a known interactive container
177
+ const isInKnownContainer = Boolean(
178
+ element.closest('button,a,[role="button"],.menu,.dropdown,.list,.toolbar')
179
+ );
180
+
181
+ // Ensure the element has at least one visible child (to avoid marking empty wrappers)
182
+ const hasVisibleChildren = [...element.children].some(isVisible);
183
+
184
+ // Avoid highlighting elements whose parent is <body> (top-level wrappers)
185
+ const isParentBody = element.parentElement && element.parentElement.isSameNode(document.body);
186
+
187
+ return (
188
+ (isInteractiveElement(element) || hasInteractiveAttributes || hasInteractiveClass) &&
189
+ hasVisibleChildren &&
190
+ isInKnownContainer &&
191
+ !isParentBody
192
+ );
193
+ }
194
+
195
+ /**
196
+ * Determines if an element represents a distinct interaction boundary.
197
+ *
198
+ * An element is considered a distinct interaction boundary if it is interactive itself,
199
+ * but none of its ancestor elements are. This helps identify the outermost interactive
200
+ * element in a nested structure, which is often the primary target for user actions.
201
+ * For example, in `<a><div>Click me</div></a>`, the `<a>` tag is the distinct boundary.
202
+ *
203
+ * @param {HTMLElement} element The element to evaluate.
204
+ * @returns {boolean} `true` if the element is a distinct interaction boundary, otherwise `false`.
205
+ */
206
+ function isElementDistinctInteraction(element) {
207
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) {
208
+ return false;
209
+ }
210
+
211
+ const tagName = element.tagName.toLowerCase();
212
+ const role = element.getAttribute('role');
213
+
214
+ // Check if it's an iframe - always distinct boundary
215
+ if (tagName === 'iframe') {
216
+ return true;
217
+ }
218
+
219
+ // Check tag name
220
+ if (INTERACTIVE_TAGS.has(tagName)) {
221
+ return true;
222
+ }
223
+ // Check interactive roles
224
+ if (role && INTERACTIVE_ROLES.has(role)) {
225
+ return true;
226
+ }
227
+ // Check contenteditable
228
+ if (element.isContentEditable || element.getAttribute('contenteditable') === 'true') {
229
+ return true;
230
+ }
231
+ // Check for common testing/automation attributes
232
+ if (element.hasAttribute('data-testid') || element.hasAttribute('data-cy') || element.hasAttribute('data-test')) {
233
+ return true;
234
+ }
235
+ // Check for explicit onclick handler (attribute or property)
236
+ if (element.hasAttribute('onclick') || typeof element.onclick === 'function') {
237
+ return true;
238
+ }
239
+ // Check for other common interaction event listeners
240
+ try {
241
+ const getEventListenersForNode = element?.ownerDocument?.defaultView?.getEventListenersForNode || window.getEventListenersForNode;
242
+ if (typeof getEventListenersForNode === 'function') {
243
+ const listeners = getEventListenersForNode(element);
244
+ const interactionEvents = ['click', 'mousedown', 'mouseup', 'keydown', 'keyup', 'submit', 'change', 'input', 'focus', 'blur'];
245
+ for (const eventType of interactionEvents) {
246
+ for (const listener of listeners) {
247
+ if (listener.type === eventType) {
248
+ return true; // Found a common interaction listener
249
+ }
250
+ }
251
+ }
252
+ }
253
+ // Fallback: Check common event attributes if getEventListeners is not available (getEventListenersForNode doesn't work in page.evaluate context)
254
+ const commonEventAttrs = ['onmousedown', 'onmouseup', 'onkeydown', 'onkeyup', 'onsubmit', 'onchange', 'oninput', 'onfocus', 'onblur'];
255
+ if (commonEventAttrs.some(attr => element.hasAttribute(attr))) {
256
+ return true;
257
+ }
258
+ } catch (e) {
259
+ // console.warn(`Could not check event listeners for ${element.tagName}:`, e);
260
+ // If checking listeners fails, rely on other checks
261
+ }
262
+
263
+ // if the element is not strictly interactive but appears clickable based on heuristic signals
264
+ if (isHeuristicallyInteractive(element)) {
265
+ return true;
266
+ }
267
+ return false;
268
+ }
269
+
270
+ /**
271
+ * Determines if an element is considered interactive.
272
+ *
273
+ * An element is deemed interactive if it meets any of the following criteria:
274
+ * 1. Is an inherently interactive HTML tag (e.g., <a>, <button>, <input>).
275
+ * 2. Has an ARIA role that implies interactivity (e.g., 'button', 'link').
276
+ * 3. Is focusable via a non-negative `tabindex`.
277
+ * 4. Has specific event listeners attached (e.g., 'click', 'keydown').
278
+ * 5. Has a 'pointer' cursor style, suggesting it's clickable.
279
+ * 6. Is content-editable.
280
+ *
281
+ * @param {HTMLElement} element The element to evaluate.
282
+ * @returns {boolean} `true` if the element is interactive, otherwise `false`.
283
+ */
284
+ function isInteractiveElement(element) {
285
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) {
286
+ return false;
287
+ }
288
+
289
+ // Cache the tagName and style lookups
290
+ const tagName = element.tagName.toLowerCase();
291
+ const style = getCachedStyle(element);
292
+
293
+
294
+ // Define interactive cursors
295
+ const interactiveCursors = new Set([
296
+ 'pointer', // Link/clickable elements
297
+ 'move', // Movable elements
298
+ 'text', // Text selection
299
+ 'grab', // Grabbable elements
300
+ 'grabbing', // Currently grabbing
301
+ 'cell', // Table cell selection
302
+ 'copy', // Copy operation
303
+ 'alias', // Alias creation
304
+ 'all-scroll', // Scrollable content
305
+ 'col-resize', // Column resize
306
+ 'context-menu', // Context menu available
307
+ 'crosshair', // Precise selection
308
+ 'e-resize', // East resize
309
+ 'ew-resize', // East-west resize
310
+ 'help', // Help available
311
+ 'n-resize', // North resize
312
+ 'ne-resize', // Northeast resize
313
+ 'nesw-resize', // Northeast-southwest resize
314
+ 'ns-resize', // North-south resize
315
+ 'nw-resize', // Northwest resize
316
+ 'nwse-resize', // Northwest-southeast resize
317
+ 'row-resize', // Row resize
318
+ 's-resize', // South resize
319
+ 'se-resize', // Southeast resize
320
+ 'sw-resize', // Southwest resize
321
+ 'vertical-text', // Vertical text selection
322
+ 'w-resize', // West resize
323
+ 'zoom-in', // Zoom in
324
+ 'zoom-out' // Zoom out
325
+ ]);
326
+
327
+ // Define non-interactive cursors
328
+ const nonInteractiveCursors = new Set([
329
+ 'not-allowed', // Action not allowed
330
+ 'no-drop', // Drop not allowed
331
+ 'wait', // Processing
332
+ 'progress', // In progress
333
+ 'initial', // Initial value
334
+ 'inherit' // Inherited value
335
+ //? Let's just include all potentially clickable elements that are not specifically blocked
336
+ // 'none', // No cursor
337
+ // 'default', // Default cursor
338
+ // 'auto', // Browser default
339
+ ]);
340
+
341
+ function doesElementHaveInteractivePointer(element) {
342
+ if (element.tagName.toLowerCase() === "html") return false;
343
+
344
+ if (interactiveCursors.has(style.cursor)) return true;
345
+
346
+ return false;
347
+ }
348
+
349
+ let isInteractiveCursor = doesElementHaveInteractivePointer(element);
350
+
351
+ // Genius fix for almost all interactive elements
352
+ if (isInteractiveCursor) {
353
+ return true;
354
+ }
355
+
356
+ const interactiveElements = new Set([
357
+ "a", // Links
358
+ "button", // Buttons
359
+ "input", // All input types (text, checkbox, radio, etc.)
360
+ "select", // Dropdown menus
361
+ "textarea", // Text areas
362
+ "details", // Expandable details
363
+ "summary", // Summary element (clickable part of details)
364
+ "label", // Form labels (often clickable)
365
+ "option", // Select options
366
+ "optgroup", // Option groups
367
+ "fieldset", // Form fieldsets (can be interactive with legend)
368
+ "legend", // Fieldset legends
369
+ ]);
370
+
371
+ // Define explicit disable attributes and properties
372
+ const explicitDisableTags = new Set([
373
+ 'disabled', // Standard disabled attribute
374
+ // 'aria-disabled', // ARIA disabled state
375
+ 'readonly', // Read-only state
376
+ // 'aria-readonly', // ARIA read-only state
377
+ // 'aria-hidden', // Hidden from accessibility
378
+ // 'hidden', // Hidden attribute
379
+ // 'inert', // Inert attribute
380
+ // 'aria-inert', // ARIA inert state
381
+ // 'tabindex="-1"', // Removed from tab order
382
+ // 'aria-hidden="true"' // Hidden from screen readers
383
+ ]);
384
+
385
+ // handle inputs, select, checkbox, radio, textarea, button and make sure they are not cursor style disabled/not-allowed
386
+ if (interactiveElements.has(tagName)) {
387
+ // Check for non-interactive cursor
388
+ if (nonInteractiveCursors.has(style.cursor)) {
389
+ return false;
390
+ }
391
+
392
+ // Check for explicit disable attributes
393
+ for (const disableTag of explicitDisableTags) {
394
+ if (element.hasAttribute(disableTag) ||
395
+ element.getAttribute(disableTag) === 'true' ||
396
+ element.getAttribute(disableTag) === '') {
397
+ return false;
398
+ }
399
+ }
400
+
401
+ // Check for disabled property on form elements
402
+ if (element.disabled) {
403
+ return false;
404
+ }
405
+
406
+ // Check for readonly property on form elements
407
+ if (element.readOnly) {
408
+ return false;
409
+ }
410
+
411
+ // Check for inert property
412
+ if (element.inert) {
413
+ return false;
414
+ }
415
+
416
+ return true;
417
+ }
418
+
419
+ const role = element.getAttribute("role");
420
+ const ariaRole = element.getAttribute("aria-role");
421
+
422
+ // Check for contenteditable attribute
423
+ if (element.getAttribute("contenteditable") === "true" || element.isContentEditable) {
424
+ return true;
425
+ }
426
+
427
+ // Added enhancement to capture dropdown interactive elements
428
+ if (element.classList && (
429
+ element.classList.contains("button") ||
430
+ element.classList.contains('dropdown-toggle') ||
431
+ element.getAttribute('data-index') ||
432
+ element.getAttribute('data-toggle') === 'dropdown' ||
433
+ element.getAttribute('aria-haspopup') === 'true'
434
+ )) {
435
+ return true;
436
+ }
437
+
438
+ const interactiveRoles = new Set([
439
+ 'button', // Directly clickable element
440
+ 'link', // Clickable link
441
+ 'menu', // Menu container (ARIA menus)
442
+ 'menubar', // Menu bar container
443
+ 'menuitem', // Clickable menu item
444
+ 'menuitemradio', // Radio-style menu item (selectable)
445
+ 'menuitemcheckbox', // Checkbox-style menu item (toggleable)
446
+ 'radio', // Radio button (selectable)
447
+ 'checkbox', // Checkbox (toggleable)
448
+ 'tab', // Tab (clickable to switch content)
449
+ 'switch', // Toggle switch (clickable to change state)
450
+ 'slider', // Slider control (draggable)
451
+ 'spinbutton', // Number input with up/down controls
452
+ 'combobox', // Dropdown with text input
453
+ 'searchbox', // Search input field
454
+ 'textbox', // Text input field
455
+ 'listbox', // Selectable list
456
+ 'option', // Selectable option in a list
457
+ 'scrollbar' // Scrollable control
458
+ ]);
459
+
460
+ // Basic role/attribute checks
461
+ const hasInteractiveRole =
462
+ interactiveElements.has(tagName) ||
463
+ (role && interactiveRoles.has(role)) ||
464
+ (ariaRole && interactiveRoles.has(ariaRole));
465
+
466
+ if (hasInteractiveRole) return true;
467
+
468
+
469
+ // check whether element has event listeners by window.getEventListeners
470
+ try {
471
+ if (typeof getEventListeners === 'function') {
472
+ const listeners = getEventListeners(element);
473
+ const mouseEvents = ['click', 'mousedown', 'mouseup', 'dblclick'];
474
+ for (const eventType of mouseEvents) {
475
+ if (listeners[eventType] && listeners[eventType].length > 0) {
476
+ return true; // Found a mouse interaction listener
477
+ }
478
+ }
479
+ }
480
+
481
+ const getEventListenersForNode = element?.ownerDocument?.defaultView?.getEventListenersForNode || window.getEventListenersForNode;
482
+ if (typeof getEventListenersForNode === 'function') {
483
+ const listeners = getEventListenersForNode(element);
484
+ const interactionEvents = ['click', 'mousedown', 'mouseup', 'keydown', 'keyup', 'submit', 'change', 'input', 'focus', 'blur'];
485
+ for (const eventType of interactionEvents) {
486
+ for (const listener of listeners) {
487
+ if (listener.type === eventType) {
488
+ return true; // Found a common interaction listener
489
+ }
490
+ }
491
+ }
492
+ }
493
+ // Fallback: Check common event attributes if getEventListeners is not available (getEventListeners doesn't work in page.evaluate context)
494
+ const commonMouseAttrs = ['onclick', 'onmousedown', 'onmouseup', 'ondblclick'];
495
+ for (const attr of commonMouseAttrs) {
496
+ if (element.hasAttribute(attr) || typeof element[attr] === 'function') {
497
+ return true;
498
+ }
499
+ }
500
+ } catch (e) {
501
+ // console.warn(`Could not check event listeners for ${element.tagName}:`, e);
502
+ // If checking listeners fails, rely on other checks
503
+ }
504
+
505
+ return false
506
+ }
507
+
508
+ /**
509
+ * Validates if an element is a meaningful text container suitable for extraction.
510
+ *
511
+ * An element is considered a valid text element if it meets all the following conditions:
512
+ * 1. It is visible (i.e., not `display: none` or `visibility: hidden`).
513
+ * 2. It contains non-empty, trimmed text content.
514
+ * 3. It is not a tag typically used for scripting or non-visual content (e.g., <script>, <style>).
515
+ * 4. Its dimensions are not trivially small (e.g., less than 3x3 pixels) and not too large.
516
+ * 5. It is not an interactive element, as those are handled separately.
517
+ *
518
+ * @param {HTMLElement} element The element to validate.
519
+ * @returns {boolean} `true` if the element is a valid text container, otherwise `false`.
520
+ */
521
+ function isValidTextElement(element) {
522
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) {
523
+ return false;
524
+ }
525
+
526
+ // Cache tagName and computed style for performance
527
+ const tagName = element.tagName.toLowerCase();
528
+ const style = getCachedStyle(element);
529
+
530
+ // 1. Visibility check - element must be visible
531
+ if (
532
+ style.display === 'none' ||
533
+ style.visibility === 'hidden' ||
534
+ parseFloat(style.opacity) === 0
535
+ ) {
536
+ return false;
537
+ }
538
+
539
+ // 2. Must contain non-whitespace text content
540
+ const text = (element.innerText || element.textContent || '').trim();
541
+ if (!text) return false;
542
+
543
+ // 3. Exclude common structural containers (usually don't display user-relevant text)
544
+ const structuralTags = new Set([
545
+ 'html', 'body', 'section', 'header', 'footer', 'main', 'nav', 'article', 'aside', 'template', 'iframe'
546
+ ]);
547
+ if (structuralTags.has(tagName)) {
548
+ return false;
549
+ }
550
+
551
+ // 4. Exclude large containers that occupy most of the viewport (likely layout or whitespace areas)
552
+ const rect = element.getBoundingClientRect();
553
+ const vw = window.innerWidth, vh = window.innerHeight;
554
+ const areaRatio = (rect.width * rect.height) / (vw * vh);
555
+ if (areaRatio > 0.6) return false; // Adjust threshold as needed
556
+
557
+ // 5. If element is interactive, let isInteractiveElement handle it to avoid duplicate processing
558
+ // if (isInteractiveElement(element) && !isElementDistinctInteraction(element)) {
559
+ if (isInteractiveElement(element)) return false;
560
+
561
+ // 6. Final validation - this is considered a meaningful text information node
562
+ return true;
563
+ }
564
+
565
+ /**
566
+ * Checks if an element is the top-most element at its center point.
567
+ *
568
+ * This function determines if the given element is the one that would receive a click
569
+ * at its geometric center. It is useful for filtering out occluded or overlaid elements.
570
+ *
571
+ * @param {HTMLElement} element The element to check.
572
+ * @returns {boolean} `true` if the element is on top, otherwise `false`.
573
+ */
574
+ function isTopElement(element) {
575
+ if (!window._viewportOnly) {
576
+ return true;
577
+ }
578
+ const viewportExpansion = 0;
579
+
580
+ const rects = element.getClientRects(element); // Replace element.getClientRects()
581
+
582
+ if (!rects || rects.length === 0) {
583
+ return false; // No geometry, cannot be top
584
+ }
585
+
586
+ let isAnyRectInViewport = false;
587
+ for (const rect of rects) {
588
+ // Use the same logic as isInExpandedViewport check
589
+ if (rect.width > 0 && rect.height > 0 && !( // Only check non-empty rects
590
+ rect.bottom < -viewportExpansion ||
591
+ rect.top > window.innerHeight + viewportExpansion ||
592
+ rect.right < -viewportExpansion ||
593
+ rect.left > window.innerWidth + viewportExpansion
594
+ )) {
595
+ isAnyRectInViewport = true;
596
+ break;
597
+ }
598
+ }
599
+
600
+ if (!isAnyRectInViewport) {
601
+ return false; // All rects are outside the viewport area
602
+ }
603
+
604
+
605
+ // Find the correct document context and root element
606
+ let doc = element.ownerDocument;
607
+
608
+ // If we're in an iframe, elements are considered top by default
609
+ if (doc !== window.document) {
610
+ return true;
611
+ }
612
+
613
+ // For shadow DOM, we need to check within its own root context
614
+ const shadowRoot = element.getRootNode();
615
+ if (shadowRoot instanceof ShadowRoot) {
616
+ const centerX = rects[Math.floor(rects.length / 2)].left + rects[Math.floor(rects.length / 2)].width / 2;
617
+ const centerY = rects[Math.floor(rects.length / 2)].top + rects[Math.floor(rects.length / 2)].height / 2;
618
+
619
+ try {
620
+ const topEl = shadowRoot.elementFromPoint(centerX, centerY);
621
+ if (!topEl) return false;
622
+
623
+ let current = topEl;
624
+ while (current && current !== shadowRoot) {
625
+ if (current === element) return true;
626
+ current = current.parentElement;
627
+ }
628
+ return false;
629
+ } catch (e) {
630
+ return true;
631
+ }
632
+ }
633
+
634
+ const margin = 10
635
+ const rect = rects[Math.floor(rects.length / 2)];
636
+
637
+ // For elements in viewport, check if they're topmost. Do the check in the
638
+ // center of the element and at the corners to ensure we catch more cases.
639
+ const checkPoints = [
640
+ // Initially only this was used, but it was not enough
641
+ {x: rect.left + rect.width / 2, y: rect.top + rect.height / 2},
642
+ {x: rect.left + margin, y: rect.top + margin}, // top left
643
+ {x: rect.right - margin, y: rect.top + margin}, // top right
644
+ {x: rect.left + margin, y: rect.bottom - margin}, // bottom left
645
+ {x: rect.right - margin, y: rect.bottom - margin}, // bottom right
646
+ ];
647
+
648
+ return checkPoints.some(({x, y}) => {
649
+ try {
650
+ const topEl = document.elementFromPoint(x, y);
651
+ if (!topEl) return false;
652
+
653
+ let current = topEl;
654
+ while (current && current !== document.documentElement) {
655
+ if (current === element) return true;
656
+ current = current.parentElement;
657
+ }
658
+ return false;
659
+ } catch (e) {
660
+ return true;
661
+ }
662
+ });
663
+ }
664
+
665
+ /**
666
+ * Checks if an element is currently visible in the DOM.
667
+ *
668
+ * Visibility is determined by the element's dimensions (width and height > 0) and
669
+ * its CSS properties (`display`, `visibility`, `opacity`).
670
+ *
671
+ * @param {HTMLElement} element The element to check.
672
+ * @returns {boolean} `true` if the element is visible, otherwise `false`.
673
+ */
674
+ function isVisible(element) {
675
+ const style = getComputedStyle(element);
676
+ return (
677
+ element.offsetWidth > 0 &&
678
+ element.offsetHeight > 0 &&
679
+ style?.visibility !== "hidden" &&
680
+ style?.display !== "none"
681
+ );
682
+ }
683
+
684
+ /**
685
+ * Generates a simplified CSS selector for an element.
686
+ *
687
+ * This function creates a selector based on the element's tag name, ID (if available),
688
+ * and class names. It is not guaranteed to be unique but is useful for providing
689
+ * a human-readable identifier.
690
+ *
691
+ * @param {HTMLElement} elem The element for which to generate a selector.
692
+ * @returns {string | null} A CSS selector string, or `null` if the element is invalid.
693
+ */
694
+ function generateSelector(elem) {
695
+ if (!elem) return null;
696
+
697
+ let sel = elem.tagName.toLowerCase();
698
+
699
+ // use id first
700
+ if (elem.id) {
701
+ sel += `#${elem.id}`;
702
+ return sel;
703
+ }
704
+
705
+ // try to get class from classList, fallback to getAttribute if not existed
706
+ let classes = [];
707
+ if (elem.classList && elem.classList.length > 0) {
708
+ classes = Array.from(elem.classList);
709
+ } else {
710
+ const raw = elem.getAttribute('class') || '';
711
+ classes = raw.trim().split(/\s+/).filter(Boolean);
712
+ }
713
+
714
+ if (classes.length > 0) {
715
+ sel += `.${classes.join('.')}`;
716
+ }
717
+
718
+ return sel;
719
+ }
720
+
721
+ /**
722
+ * Generates a robust XPath for an element.
723
+ *
724
+ * This function constructs an XPath by traversing up the DOM tree from the element.
725
+ * It prefers using an ID if available, otherwise it builds a path based on tag names
726
+ * and sibling indices, making the XPath stable and unique.
727
+ *
728
+ * @param {HTMLElement} elem The element for which to generate the XPath.
729
+ * @returns {string} The generated XPath string.
730
+ */
731
+ function generateXPath(elem) {
732
+ if (!(elem instanceof Element)) return '';
733
+ if (elem.id) return `//*[@id=\"${elem.id}\"]`;
734
+ const parts = [];
735
+ while (elem && elem.nodeType === Node.ELEMENT_NODE) {
736
+ let idx = 1;
737
+ let sib = elem.previousElementSibling;
738
+ while (sib) {
739
+ if (sib.nodeName === elem.nodeName) idx++;
740
+ sib = sib.previousElementSibling;
741
+ }
742
+ parts.unshift(elem.nodeName.toLowerCase() + `[${idx}]`);
743
+ elem = elem.parentElement;
744
+ }
745
+ return '/' + parts.join('/');
746
+ }
747
+
748
+ /**
749
+ * Gathers comprehensive information about a DOM element.
750
+ *
751
+ * This function collects a wide range of properties for an element, including its identity,
752
+ * attributes, layout, visibility, interactivity, and position. This data is used to
753
+ * build the DOM tree and determine which elements to highlight.
754
+ *
755
+ * @param {HTMLElement} elem The element to gather information from.
756
+ * @param {boolean} isParentHighlighted A flag indicating if an ancestor of this element is highlighted.
757
+ * @returns {object} An object containing detailed information about the element.
758
+ */
759
+ function getElementInfo(elem, isParentHighlighted) {
760
+ const r = elem.getBoundingClientRect();
761
+ const sx = window.pageXOffset || document.documentElement.scrollLeft;
762
+ const sy = window.pageYOffset || document.documentElement.scrollTop;
763
+ let txt = '';
764
+
765
+ elem.childNodes.forEach(c => {
766
+ if (c.nodeType === Node.TEXT_NODE) txt += c.textContent.trim();
767
+ });
768
+
769
+ return {
770
+ // id: getElementId(elem),
771
+ node: elem,
772
+ tagName: elem.tagName.toLowerCase(),
773
+ className: elem.getAttribute('class') || null,
774
+ type: elem.getAttribute('type') || null, placeholder: elem.getAttribute('placeholder') || null,
775
+ innerText: txt || (elem.innerText || elem.value || '').trim(),
776
+ attributes: Array.from(elem.attributes).map(a => ({name: a.name, value: a.value})),
777
+
778
+ viewport: {x: r.left + sx, y: r.top + sy, width: r.width, height: r.height},
779
+ center_x: r.left + r.width / 2 + sx,
780
+ center_y: r.top + r.height / 2 + sy,
781
+
782
+ isVisible: isVisible(elem),
783
+ isInteractive: isInteractiveElement(elem),
784
+ isValidText: isValidTextElement(elem),
785
+ isTopElement: isTopElement(elem),
786
+ isInViewport: !(r.bottom < 0 || r.top > window.innerHeight || r.right < 0 || r.left > window.innerWidth),
787
+
788
+ isParentHighlighted: isParentHighlighted,
789
+ xpath: generateXPath(elem),
790
+ selector: generateSelector(elem),
791
+ styles: getStyleSubset(elem)
792
+ };
793
+ }
794
+
795
+ // ============================= Highlight Element =============================
796
+ /**
797
+ * Selects a random color from a predefined palette.
798
+ *
799
+ * @returns {string} A hexadecimal color string.
800
+ */
801
+ function randomColor() {
802
+ return palette[Math.floor(Math.random() * palette.length)];
803
+ }
804
+
805
+ /**
806
+ * Determines whether an element should be highlighted based on current settings and its properties.
807
+ *
808
+ * This function applies a set of rules to decide if an element qualifies for highlighting.
809
+ * It checks for visibility, viewport presence, interactivity, and text content based on
810
+ * the global `_viewportOnly` and `_highlightText` flags. It also prevents highlighting
811
+ * nested non-distinct elements if a parent is already highlighted.
812
+ *
813
+ * @param {object} elemInfo The information object for the element, from `getElementInfo`.
814
+ * @param {HTMLElement} elemObj The actual DOM element.
815
+ * @param {boolean} isParentHighlighted `true` if an ancestor of this element is already highlighted.
816
+ * @returns {boolean} `true` if the element should be highlighted, otherwise `false`.
817
+ */
818
+ function handleHighlighting(elemInfo, elemObj, isParentHighlighted) {
819
+ function shouldHighlightElem(nodeInfo) {
820
+ const role = elemObj.getAttribute('role');
821
+ const isMenuContainer = role === 'menu' || role === 'menubar' || role === 'listbox';
822
+ if (isMenuContainer) return true;
823
+ // if (window._viewportOnly === true && !nodeInfo.isInViewport) return false;
824
+
825
+ if (window._highlightText) {
826
+ return nodeInfo.isVisible && nodeInfo.isTopElement && nodeInfo.isValidText;
827
+ } else {
828
+ return nodeInfo.isVisible && nodeInfo.isTopElement && nodeInfo.isInteractive;
829
+ }
830
+ }
831
+
832
+ // 1) basic filter
833
+ if (!shouldHighlightElem(elemInfo)) return false;
834
+
835
+ if (window._highlightText) {
836
+ if (isParentHighlighted && !elemInfo.isInteractive) return false
837
+ } else {
838
+ if (isParentHighlighted && !isElementDistinctInteraction(elemObj)) return false;
839
+ }
840
+
841
+ // 2) skip if parent is highlighted and is not distinct interaction
842
+ if (isParentHighlighted && !isElementDistinctInteraction(elemObj)) return false;
843
+
844
+ // 3) (optional) highlight only within viewport
845
+ // if (!elemInfo.isInViewport && elemInfo.viewportExpansion !== -1) return false;
846
+
847
+ if (highlightMap.has(elemObj)) {
848
+ elemInfo.highlightIndex = highlightMap.get(elemObj);
849
+ } else {
850
+ elemInfo.highlightIndex = highlightIndex;
851
+ highlightMap.set(elemObj, highlightIndex);
852
+ highlightIndex += 1;
853
+ }
854
+
855
+ return true;
856
+ }
857
+
858
+ /**
859
+ * Renders visual highlights for elements in the processed DOM tree.
860
+ *
861
+ * This function iterates through the tree and draws colored boxes and labels on an overlay
862
+ * for each element that has been marked for highlighting. It clears and redraws the
863
+ * highlights, making it suitable for dynamic updates on scroll or resize.
864
+ *
865
+ * @param {object} tree The root of the element tree to render.
866
+ */
867
+ function renderHighlights(tree) {
868
+ overlayContainer.textContent = '';
869
+ (function walk(node) {
870
+ if (!node) return;
871
+
872
+ if (node.node) {
873
+ const info = node.node;
874
+ const elem = info.node;
875
+ const rects = Array.from(elem.getClientRects()).filter(r => r.width >= 2 && r.height >= 2);
876
+ if (rects.length === 0) return;
877
+
878
+ // 1. Color: assign a fixed color for each element
879
+ let color = _elementHighlightColorMap.get(elem);
880
+ if (!color) {
881
+ color = randomColor();
882
+ _elementHighlightColorMap.set(elem, color);
883
+ }
884
+
885
+ // 2. Draw box for each rect (maintain visual consistency for multi-line/multi-rect elements)
886
+ rects.forEach(r => {
887
+ const box = document.createElement('div');
888
+ Object.assign(box.style, {
889
+ position: 'fixed',
890
+ top: `${r.top}px`,
891
+ left: `${r.left}px`,
892
+ width: `${r.width}px`,
893
+ height: `${r.height}px`,
894
+ outline: `2px dashed ${color}`,
895
+ boxSizing: 'border-box',
896
+ pointerEvents: 'none'
897
+ });
898
+ overlayContainer.appendChild(box);
899
+ });
900
+
901
+ // 3. Calculate union rect as fallback and external positioning reference
902
+ const union = rects.reduce((acc, r) => {
903
+ if (!acc) {
904
+ return {
905
+ top: r.top,
906
+ left: r.left,
907
+ right: r.right,
908
+ bottom: r.bottom
909
+ };
910
+ }
911
+ return {
912
+ top: Math.min(acc.top, r.top),
913
+ left: Math.min(acc.left, r.left),
914
+ right: Math.max(acc.right, r.right),
915
+ bottom: Math.max(acc.bottom, r.bottom)
916
+ };
917
+ }, null);
918
+ if (!union) return;
919
+
920
+ // 4. Create label (hidden first for measurement)
921
+ const label = document.createElement('div');
922
+ label.textContent = info.highlightIndex;
923
+ Object.assign(label.style, {
924
+ position: 'fixed',
925
+ backgroundColor: color,
926
+ color: '#fff',
927
+ fontSize: '10px',
928
+ padding: '1px 2px',
929
+ borderRadius: '3px',
930
+ pointerEvents: 'none',
931
+ visibility: 'hidden',
932
+ whiteSpace: 'nowrap',
933
+ boxSizing: 'border-box'
934
+ });
935
+ overlayContainer.appendChild(label);
936
+ const labelRect = label.getBoundingClientRect();
937
+
938
+ // 5. Positioning: prioritize placing in the top-right corner of the first rect, with fallback logic from index.js
939
+ const firstRect = rects[0];
940
+ let labelTop = firstRect.top + 2; // slightly below the internal top
941
+ let labelLeft = firstRect.left + firstRect.width - labelRect.width - 2; // right-aligned
942
+
943
+ // If it doesn't fit (first rect is too small), place above the rect, right-aligned
944
+ if (firstRect.width < labelRect.width + 4 || firstRect.height < labelRect.height + 4) {
945
+ labelTop = firstRect.top - labelRect.height - 2;
946
+ labelLeft = firstRect.left + firstRect.width - labelRect.width - 2;
947
+ }
948
+
949
+ // Final fallback: if still overflowing or in very crowded scenarios, fallback to union's top-left interior
950
+ if (labelLeft < 0 || labelTop < 0 || labelLeft + labelRect.width > window.innerWidth) {
951
+ // Inside union's top-left
952
+ labelLeft = union.left + 2;
953
+ labelTop = union.top + 2;
954
+ }
955
+
956
+ // Clamp to viewport
957
+ labelTop = Math.max(0, Math.min(labelTop, window.innerHeight - labelRect.height));
958
+ labelLeft = Math.max(0, Math.min(labelLeft, window.innerWidth - labelRect.width));
959
+
960
+ label.style.left = `${labelLeft}px`;
961
+ label.style.top = `${labelTop}px`;
962
+ label.style.visibility = 'visible';
963
+ }
964
+
965
+ node.children.forEach(walk);
966
+ })(tree, false);
967
+ }
968
+
969
+ // ============================= Build Dom Tree =============================
970
+ /**
971
+ * Recursively builds a structured tree representing the DOM.
972
+ *
973
+ * This is the core function for crawling the DOM. It starts from a given element (usually the body),
974
+ * gathers information for each node, determines if it should be highlighted, and recursively
975
+ * processes its children. The resulting tree contains only the elements that are either
976
+ * highlighted themselves or contain highlighted descendants.
977
+ *
978
+ * @param {HTMLElement} elemObj The DOM element to start building the tree from.
979
+ * @param {boolean} [wasParentHighlighted=false] A flag passed during recursion to indicate if an ancestor was highlighted.
980
+ * @returns {object | null} A tree node object, or `null` if the element and its descendants are not relevant.
981
+ */
982
+ function buildTree(elemObj, wasParentHighlighted = false) {
983
+ // 1) get element info
984
+ const elemInfo = getElementInfo(elemObj, wasParentHighlighted);
985
+
986
+ // 2) check node satisfies highlight condition
987
+ const isCurNodeHighlighted = handleHighlighting(elemInfo, elemObj, wasParentHighlighted)
988
+ const isParentHighlighted = wasParentHighlighted || isCurNodeHighlighted;
989
+
990
+ // 3) recursively build structured dom tree, with 'isParentHighlighted' state
991
+ const children = [];
992
+ Array.from(elemObj.children).forEach(child => {
993
+ const subtree = buildTree(child, isParentHighlighted);
994
+ if (subtree) children.push(subtree);
995
+ });
996
+
997
+ // 4) highlight filter
998
+ if (isCurNodeHighlighted) {
999
+ highlightIdMap[elemInfo.highlightIndex] = elemInfo; // map highlightIndex to element info
1000
+ return {node: elemInfo, children}; // keep info if is highlightable
1001
+ } else if (children.length > 0) {
1002
+ return {node: null, children}; // child node is highlightable
1003
+ } else {
1004
+ return null; // skip
1005
+ }
1006
+ }
1007
+
1008
+ // ============================= Main Function =============================
1009
+ /**
1010
+ * The main entry point for building and processing the element tree.
1011
+ *
1012
+ * This function initializes the process, calls `buildTree` to construct the DOM representation,
1013
+ * and optionally triggers the rendering of highlights. It also sets up event listeners
1014
+ * to re-render highlights on scroll and resize events to keep them in sync with the layout.
1015
+ *
1016
+ * @returns {[object, object]} A tuple containing the generated DOM tree and the map of highlight indices to element info.
1017
+ */
1018
+ window.buildElementTree = function () {
1019
+ highlightIdMap = {};
1020
+ const tree = buildTree(document.body);
1021
+
1022
+ if (window._highlight) {
1023
+ renderHighlights(tree);
1024
+ window.addEventListener('scroll', () => renderHighlights(tree), {passive: true, capture: true});
1025
+ window.addEventListener('resize', () => renderHighlights(tree));
1026
+ }
1027
+ return [tree, highlightIdMap];
1028
+ }
1029
+ }
1030
+ )();
webqa_agent/crawler/js/marker_remover.js ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ () => {
2
+ // 移除标记容器
3
+ const markerContainer = document.getElementById('__marker_container__');
4
+ if (markerContainer) {
5
+ markerContainer.remove();
6
+ }
7
+
8
+ // 清除所有标记元素
9
+ const markers = document.querySelectorAll('.__marker_element__');
10
+ markers.forEach(marker => marker.remove());
11
+
12
+ // 清除可能残留的样式
13
+ const styles = document.querySelectorAll('style[data-marker-style]');
14
+ styles.forEach(style => style.remove());
15
+ }
webqa_agent/crawler/js/text_extractor.js ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ () => {
2
+ // 基础可见性检查
3
+ function isVisible(element) {
4
+ if (!element || !element.getBoundingClientRect) return false;
5
+ const rect = element.getBoundingClientRect();
6
+ const style = window.getComputedStyle(element);
7
+
8
+ return rect.width > 3 &&
9
+ rect.height > 3 &&
10
+ style.display !== 'none' &&
11
+ style.visibility !== 'hidden' &&
12
+ style.opacity !== '0' &&
13
+ parseFloat(style.opacity) > 0;
14
+ }
15
+
16
+ // 检查元素是否是最顶层的(模仿 element_detector.js 的实现)
17
+ function isTopElement(elem) {
18
+ const rect = elem.getBoundingClientRect();
19
+ // 如果元素在视口外,返回 true(处理边界情况)
20
+ if (rect.right < 0 || rect.left > window.innerWidth || rect.bottom < 0 || rect.top > window.innerHeight) {
21
+ return true;
22
+ }
23
+
24
+ // 计算元素中心点坐标
25
+ const cx = rect.left + rect.width / 2;
26
+ const cy = rect.top + rect.height / 2;
27
+
28
+ try {
29
+ // 获取在该中心点位置的最顶层元素
30
+ const topEl = document.elementFromPoint(cx, cy);
31
+ let curr = topEl;
32
+
33
+ // 检查该元素或其父级是否包含目标元素
34
+ while (curr && curr !== document.documentElement) {
35
+ if (curr === elem) return true;
36
+ curr = curr.parentElement;
37
+ }
38
+ return false;
39
+ } catch {
40
+ return true;
41
+ }
42
+ }
43
+
44
+ // 检查元素是否有实际内容
45
+ function hasContent(element) {
46
+ // 文本内容检查
47
+ const text = element.innerText || '';
48
+ if (text.trim().length > 1 && !/^\d+$/.test(text.trim())) {
49
+ return true;
50
+ }
51
+
52
+ // 检查是否是有意义的元素类型
53
+ const tagName = element.tagName.toLowerCase();
54
+ if (['a', 'button', 'input', 'select', 'textarea', 'img',
55
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li'].includes(tagName)) {
56
+ return true;
57
+ }
58
+
59
+ // 检查是否有样式类名,表明可能有语义
60
+ if (element.className && typeof element.className === 'string' && element.className.length > 0) {
61
+ return true;
62
+ }
63
+
64
+ // 检查是否有交互属性
65
+ if (element.getAttribute('role') ||
66
+ element.getAttribute('aria-label') ||
67
+ element.onclick ||
68
+ element.getAttribute('onclick') ||
69
+ element.getAttribute('href') ||
70
+ element.getAttribute('tabindex') !== null) {
71
+ return true;
72
+ }
73
+
74
+ return false;
75
+ }
76
+
77
+ // 提取元素文本内容
78
+ function getElementText(element) {
79
+ // 如果是输入元素,获取其值或占位符
80
+ if (element.tagName.toLowerCase() === 'input' ||
81
+ element.tagName.toLowerCase() === 'textarea') {
82
+ return element.value || element.placeholder || '';
83
+ }
84
+
85
+ // 优先获取整个元素的innerText,这样可以包含子元素的文本
86
+ let textContent = element.innerText?.trim() || '';
87
+
88
+ // 如果innerText为空,尝试获取元素的直接文本内容
89
+ if (!textContent) {
90
+ for (const node of element.childNodes) {
91
+ if (node.nodeType === Node.TEXT_NODE) {
92
+ const trimmed = node.textContent.trim();
93
+ if (trimmed) textContent += trimmed + ' ';
94
+ }
95
+ }
96
+ textContent = textContent.trim();
97
+ }
98
+
99
+ return textContent;
100
+ }
101
+
102
+ // 检查文本是否有意义
103
+ function isMeaningfulText(text) {
104
+ // 排除只有数字的文本(可能是分页或列表编号)
105
+ if (/^[0-9]+$/.test(text)) {
106
+ return false;
107
+ }
108
+
109
+ // 排除太短的文本
110
+ if (text.length < 3) {
111
+ return false;
112
+ }
113
+
114
+ return true;
115
+ }
116
+
117
+ // 收集页面上所有可见元素的文本信息
118
+ function collectTextElements(rootElement) {
119
+ const textElements = [];
120
+ const processedTexts = new Set(); // 用于去重
121
+
122
+ function processElement(element) {
123
+ // 检查元素是否可见且是最顶层的
124
+ if (!element || !isVisible(element) || !isTopElement(element)) return;
125
+
126
+ // 获取元素的文本内容
127
+ const text = getElementText(element);
128
+
129
+ // 如果当前元素有有意义的文本内容,收集它并跳过子元素处理
130
+ if (text && isMeaningfulText(text) && !processedTexts.has(text)) {
131
+ const rect = element.getBoundingClientRect();
132
+ processedTexts.add(text);
133
+
134
+ textElements.push({
135
+ text,
136
+ tag: element.tagName.toLowerCase(),
137
+ position: {
138
+ x: Math.round(rect.left),
139
+ y: Math.round(rect.top),
140
+ width: Math.round(rect.width),
141
+ height: Math.round(rect.height)
142
+ }
143
+ });
144
+
145
+ // 如果当前元素有有意义的文本,就不再处理子元素,避免重复
146
+ return;
147
+ }
148
+
149
+ // 只有当前元素没有有意义的文本时,才递归处理子元素
150
+ for (const child of element.children) {
151
+ processElement(child);
152
+ }
153
+ }
154
+
155
+ processElement(rootElement);
156
+ return textElements;
157
+ }
158
+
159
+ // 主函数:提取页面内容
160
+ function extractPageContent() {
161
+ // 获取页面元数据
162
+ const metadata = {
163
+ title: document.title,
164
+ url: window.location.href,
165
+ size: {
166
+ width: window.innerWidth,
167
+ height: window.innerHeight,
168
+ scrollHeight: document.documentElement.scrollHeight
169
+ }
170
+ };
171
+
172
+ // 收集所有文本元素
173
+ const textElements = collectTextElements(document.body);
174
+
175
+ return {
176
+ metadata,
177
+ textElements
178
+ };
179
+ }
180
+
181
+ return extractPageContent();
182
+ }
webqa_agent/data/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .test_structures import (
2
+ ParallelTestSession,
3
+ TestConfiguration,
4
+ TestExecutionContext,
5
+ TestResult,
6
+ TestStatus,
7
+ TestType,
8
+ get_default_test_name,
9
+ )
10
+
11
+ __all__ = ["TestType", "TestStatus", "TestConfiguration", "TestExecutionContext", "TestResult", "ParallelTestSession", "get_default_test_name"]
webqa_agent/data/test_structures.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from enum import Enum
3
+ from typing import Any, Dict, List, Optional, Union
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from webqa_agent.browser.config import DEFAULT_CONFIG
8
+
9
+ # 侧边栏标题(默认)
10
+ CATEGORY_TITLES: Dict[str, Dict[str, str]] = {
11
+ "zh-CN": {
12
+ "function": "功能测试",
13
+ "ux": "UX测试",
14
+ "performance": "性能测试",
15
+ "security": "安全测试",
16
+ },
17
+ "en-US": {
18
+ "function": "Function Test",
19
+ "ux": "UX Test",
20
+ "performance": "Performance Test",
21
+ "security": "Security Test",
22
+ }
23
+ }
24
+
25
+
26
+ class TestCategory(str, Enum):
27
+ FUNCTION = "function"
28
+ UX = "ux"
29
+ SECURITY = "security"
30
+ PERFORMANCE = "performance"
31
+
32
+ # 测试类型
33
+ class TestType(str, Enum):
34
+ """Test type enumeration."""
35
+
36
+ UNKNOWN = "unknown"
37
+ BASIC_TEST = "basic_test"
38
+ # BUTTON_TEST = "button_test"
39
+ UI_AGENT_LANGGRAPH = "ui_agent_langgraph"
40
+ UX_TEST = "ux_test"
41
+ PERFORMANCE = "performance_test"
42
+ # WEB_BASIC_CHECK = "web_basic_check"
43
+ SECURITY_TEST = "security_test"
44
+ SEO_TEST = "seo_test"
45
+
46
+ def get_category_for_test_type(test_type: TestType) -> TestCategory:
47
+ """Map TestType to TestCategory."""
48
+ mapping = {
49
+ TestType.UI_AGENT_LANGGRAPH: TestCategory.FUNCTION,
50
+ TestType.BASIC_TEST: TestCategory.FUNCTION,
51
+ # TestType.BUTTON_TEST: TestCategory.FUNCTION,
52
+ # TestType.WEB_BASIC_CHECK: TestCategory.FUNCTION,
53
+ TestType.UX_TEST: TestCategory.UX,
54
+ TestType.PERFORMANCE: TestCategory.PERFORMANCE,
55
+ TestType.SECURITY_TEST: TestCategory.SECURITY,
56
+ TestType.UNKNOWN: TestCategory.FUNCTION, # Default to function for unknown types
57
+ }
58
+ return mapping.get(test_type, TestCategory.FUNCTION)
59
+
60
+
61
+ # 报告子标题栏
62
+ TEST_TYPE_DEFAULT_NAMES: Dict[str, Dict[TestType, str]] = {
63
+ "zh-CN": {
64
+ TestType.UI_AGENT_LANGGRAPH: "智能功能测试",
65
+ TestType.BASIC_TEST: "遍历测试",
66
+ # TestType.BUTTON_TEST: "功能测试",
67
+ # TestType.WEB_BASIC_CHECK: "技术健康度检查",
68
+ TestType.UX_TEST: "用户体验测试",
69
+ TestType.PERFORMANCE: "性能测试",
70
+ TestType.SECURITY_TEST: "安全测试",
71
+ },
72
+ "en-US": {
73
+ TestType.UI_AGENT_LANGGRAPH: "AI Function Test",
74
+ TestType.BASIC_TEST: "Basic Function Test",
75
+ # TestType.BUTTON_TEST: "Traversal Test",
76
+ # TestType.WEB_BASIC_CHECK: "Technical Health Check",
77
+ TestType.UX_TEST: "UX Test",
78
+ TestType.PERFORMANCE: "Performance Test",
79
+ TestType.SECURITY_TEST: "Security Test",
80
+ }
81
+ }
82
+
83
+
84
+ def get_default_test_name(test_type: TestType, language: str = "zh-CN") -> str:
85
+ """Return the internal default test name for a given TestType.
86
+
87
+ Names are hardcoded and not user-configurable.
88
+ """
89
+ return TEST_TYPE_DEFAULT_NAMES.get(language, {}).get(test_type, test_type.value)
90
+
91
+
92
+ class TestStatus(str, Enum):
93
+ """Test status enumeration."""
94
+
95
+ PENDING = "pending"
96
+ RUNNING = "running"
97
+ PASSED = "passed"
98
+ WARNING = "warning"
99
+ INCOMPLETED = "incompleted"
100
+ FAILED = "failed"
101
+ CANCELLED = "cancelled"
102
+
103
+
104
+ class TestConfiguration(BaseModel):
105
+ """Test configuration for parallel execution."""
106
+
107
+ test_id: Optional[str] = None
108
+ test_type: Optional[TestType] = TestType.BASIC_TEST
109
+ test_name: Optional[str] = ""
110
+ enabled: Optional[bool] = True
111
+ browser_config: Optional[Dict[str, Any]] = DEFAULT_CONFIG
112
+ report_config: Optional[Dict[str, Any]] = {"language": "zh-CN"}
113
+ test_specific_config: Optional[Dict[str, Any]] = {}
114
+ timeout: Optional[int] = 300 # seconds
115
+ retry_count: Optional[int] = 0
116
+ dependencies: Optional[List[str]] = [] # test_ids that must complete first
117
+
118
+
119
+ class TestExecutionContext(BaseModel):
120
+ """Execution context for a single test."""
121
+
122
+ test_config: TestConfiguration
123
+ session_id: str
124
+ start_time: Optional[datetime] = None
125
+ end_time: Optional[datetime] = None
126
+ status: Optional[TestStatus] = TestStatus.PENDING
127
+ error_message: Optional[str] = ""
128
+ retry_attempts: Optional[int] = 0
129
+
130
+ def start_execution(self):
131
+ """Mark test as started."""
132
+ self.start_time = datetime.now().replace(microsecond=0)
133
+ self.status = TestStatus.RUNNING
134
+
135
+ def complete_execution(self, success: bool = True, error_message: str = ""):
136
+ """Mark test as completed."""
137
+ self.end_time = datetime.now().replace(microsecond=0)
138
+ self.status = TestStatus.PASSED if success else TestStatus.FAILED
139
+ self.error_message = error_message
140
+
141
+ @property
142
+ def duration(self) -> Optional[float]:
143
+ """Get execution duration in seconds."""
144
+ if self.start_time and self.end_time:
145
+ return (self.end_time - self.start_time).total_seconds()
146
+ return None
147
+
148
+
149
+ class SubTestScreenshot(BaseModel):
150
+ type: str
151
+ data: str # base64 encoded image data
152
+
153
+
154
+ class SubTestAction(BaseModel):
155
+ description: Optional[str]
156
+ index: int
157
+ success: bool
158
+
159
+
160
+ class SubTestStep(BaseModel):
161
+ id: int
162
+ screenshots: Optional[List[SubTestScreenshot]] = []
163
+ modelIO: Optional[str] = ""
164
+ actions: Optional[List[SubTestAction]] = []
165
+ description: Optional[str] = ""
166
+ status: Optional[TestStatus] = TestStatus.PASSED
167
+ errors: Optional[str] = ""
168
+
169
+
170
+ class SubTestReport(BaseModel):
171
+ title: str
172
+ issues: str
173
+
174
+
175
+ class SubTestResult(BaseModel):
176
+ """Fine-grained result for a sub test / test case.
177
+
178
+ TODO: Update type of `messages`
179
+ """
180
+
181
+ name: str
182
+ status: Optional[TestStatus] = TestStatus.PENDING
183
+ metrics: Optional[Dict[str, Any]] = {}
184
+ steps: Optional[List[SubTestStep]] = [] # Detailed execution steps
185
+ messages: Optional[Dict[str, Any]] = {} # Browser monitoring data
186
+ start_time: Optional[str] = None
187
+ end_time: Optional[str] = None
188
+ final_summary: Optional[str] = ""
189
+ report: Optional[List[SubTestReport]] = []
190
+
191
+
192
+ class TestResult(BaseModel):
193
+ """Isolated test result data."""
194
+
195
+ test_id: Optional[str] = ""
196
+ test_type: Optional[TestType] = TestType.UNKNOWN
197
+ test_name: Optional[str] = ""
198
+ module_name: Optional[str] = ""
199
+ status: Optional[TestStatus] = TestStatus.PENDING
200
+ # New field to indicate test category (function/ui/performance)
201
+
202
+ category: Optional[TestCategory] = TestCategory.FUNCTION
203
+ start_time: Optional[datetime] = None
204
+ end_time: Optional[datetime] = None
205
+ duration: Optional[float] = None
206
+
207
+ # Deprecated free-form dict; keep until callers migrated
208
+ results: Optional[Dict[str, Any]] = {}
209
+
210
+ # Structured list replacing the old 'results' field
211
+ sub_tests: Optional[List[SubTestResult]] = []
212
+
213
+ # Artifacts
214
+ logs: Optional[List[str]] = []
215
+ traces: Optional[List[str]] = []
216
+
217
+ # Error information
218
+ error_message: Optional[str] = ""
219
+ error_details: Optional[Dict[str, Any]] = {}
220
+
221
+ # Metrics
222
+ metrics: Optional[Dict[str, Union[int, float, str]]] = {}
223
+
224
+ def add_log(self, log_path: str):
225
+ """Add log file to results."""
226
+ self.logs.append(log_path)
227
+
228
+ def add_metric(self, key: str, value: Union[int, float, str]):
229
+ """Add metric to results."""
230
+ self.metrics[key] = value
231
+
232
+ def add_data(self, key: str, value: Any):
233
+ """Add data to results."""
234
+ self.results[key] = value
235
+
236
+
237
+ class ParallelTestSession(BaseModel):
238
+ """Session data for parallel test execution."""
239
+
240
+ session_id: Optional[str] = None
241
+ target_url: Optional[str] = ""
242
+ llm_config: Optional[Dict[str, Any]] = {}
243
+
244
+ # Test configurations
245
+ test_configurations: Optional[List[TestConfiguration]] = []
246
+
247
+ # Execution tracking
248
+ test_contexts: Optional[Dict[str, TestExecutionContext]] = {}
249
+ test_results: Optional[Dict[str, TestResult]] = {}
250
+
251
+ # Session metadata
252
+ start_time: Optional[datetime] = None
253
+ end_time: Optional[datetime] = None
254
+
255
+ # Aggregated results
256
+ aggregated_results: Optional[Dict[str, Any]] = {}
257
+ llm_summary: Optional[str] = ""
258
+ report_path: Optional[str] = ""
259
+ html_report_path: Optional[str] = ""
260
+
261
+ def add_test_configuration(self, test_config: TestConfiguration):
262
+ """Add test configuration to session."""
263
+ self.test_configurations.append(test_config)
264
+
265
+ # Create execution context
266
+ context = TestExecutionContext(test_config=test_config, session_id=self.session_id)
267
+ self.test_contexts[test_config.test_id] = context
268
+
269
+ # Initialize result
270
+ result = TestResult(
271
+ test_id=test_config.test_id,
272
+ test_type=test_config.test_type,
273
+ test_name=test_config.test_name,
274
+ status=TestStatus.PENDING,
275
+ category=get_category_for_test_type(test_config.test_type),
276
+ )
277
+ self.test_results[test_config.test_id] = result
278
+
279
+ def start_session(self):
280
+ """Start the test session."""
281
+ self.start_time = datetime.now()
282
+
283
+ def complete_session(self):
284
+ """Complete the test session."""
285
+ self.end_time = datetime.now()
286
+
287
+ def update_test_result(self, test_id: str, result: TestResult):
288
+ """Update test result."""
289
+ self.test_results[test_id] = result
290
+
291
+ def get_test_by_type(self, test_type: TestType) -> List[TestConfiguration]:
292
+ """Get all tests of specific type."""
293
+ return [config for config in self.test_configurations if config.test_type == test_type]
294
+
295
+ def get_enabled_tests(self) -> List[TestConfiguration]:
296
+ """Get all enabled test configurations."""
297
+ return [config for config in self.test_configurations if config.enabled]
298
+
299
+ def get_summary_stats(self) -> Dict[str, Any]:
300
+ """Get session summary statistics"""
301
+ return {
302
+ "session_id": self.session_id,
303
+ "target_url": self.target_url,
304
+ "start_time": self.start_time.replace(microsecond=0).isoformat() if self.start_time else None,
305
+ "end_time": self.end_time.replace(microsecond=0).isoformat() if self.end_time else None,
306
+ }
307
+
308
+ def to_dict(self) -> Dict[str, Any]:
309
+ """Convert session to dictionary with grouped test results."""
310
+ grouped_results: Dict[str, Dict[str, Any]] = {}
311
+
312
+ if self.test_configurations and len(self.test_configurations) > 0:
313
+ language = self.test_configurations[0].report_config.get("language", "zh-CN")
314
+
315
+ for cat in TestCategory:
316
+ key = f"{cat.value}_test_results"
317
+ grouped_results[key] = {"title": CATEGORY_TITLES[language].get(cat.value, cat.name), "items": []}
318
+
319
+ for result in self.test_results.values():
320
+ key = f"{result.category.value}_test_results"
321
+ if key not in grouped_results:
322
+ grouped_results[key] = {
323
+ "title": CATEGORY_TITLES[language].get(result.category.value, result.category.name.title()),
324
+ "items": [],
325
+ }
326
+ grouped_results[key]["items"].append(result.dict())
327
+
328
+ return {
329
+ "session_info": self.get_summary_stats(),
330
+ "aggregated_results": self.aggregated_results,
331
+ "test_results": grouped_results,
332
+ "llm_summary": self.llm_summary,
333
+ }
webqa_agent/executor/__init__.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .parallel_executor import ParallelTestExecutor
2
+ from .parallel_mode import ParallelMode
3
+ from .result_aggregator import ResultAggregator
4
+ from .test_runners import (
5
+ BasicTestRunner,
6
+ LighthouseTestRunner,
7
+ UIAgentLangGraphRunner,
8
+ UXTestRunner
9
+ )
10
+
11
+ __all__ = [
12
+ "ParallelMode",
13
+ "ParallelTestExecutor",
14
+ "BasicTestRunner",
15
+ "UIAgentLangGraphRunner",
16
+ "UXTestRunner",
17
+ "LighthouseTestRunner",
18
+ "WebBasicCheckRunner",
19
+ "ResultAggregator",
20
+ ]
webqa_agent/executor/parallel_executor.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ import os
4
+ from typing import Dict, List, Optional
5
+
6
+ # Session ID constants
7
+ SECURITY_TEST_NO_SESSION_ID = "security_test_no_session"
8
+
9
+ from webqa_agent.browser.session import BrowserSessionManager
10
+ from webqa_agent.data import ParallelTestSession, TestConfiguration, TestResult, TestStatus, TestType
11
+ from webqa_agent.data.test_structures import get_category_for_test_type
12
+ from webqa_agent.executor.result_aggregator import ResultAggregator
13
+ from webqa_agent.executor.test_runners import (
14
+ BasicTestRunner,
15
+ LighthouseTestRunner,
16
+ SecurityTestRunner,
17
+ UIAgentLangGraphRunner,
18
+ UXTestRunner,
19
+ )
20
+ from webqa_agent.utils.log_icon import icon
21
+
22
+
23
+ class ParallelTestExecutor:
24
+ """Parallel test execution manager."""
25
+
26
+ def __init__(self, max_concurrent_tests: int = 4):
27
+ self.max_concurrent_tests = max_concurrent_tests
28
+ self.session_manager = BrowserSessionManager()
29
+
30
+ # Test runners mapping
31
+ self.test_runners = {
32
+ TestType.UI_AGENT_LANGGRAPH: UIAgentLangGraphRunner(),
33
+ TestType.UX_TEST: UXTestRunner(),
34
+ TestType.PERFORMANCE: LighthouseTestRunner(),
35
+ TestType.BASIC_TEST: BasicTestRunner(),
36
+ # TestType.WEB_BASIC_CHECK: WebBasicCheckRunner(),
37
+ # TestType.BUTTON_TEST: ButtonTestRunner(),
38
+ TestType.SECURITY_TEST: SecurityTestRunner(),
39
+ }
40
+
41
+ # Execution tracking
42
+ self.running_tests: Dict[str, asyncio.Task] = {}
43
+ self.completed_tests: Dict[str, TestResult] = {}
44
+
45
+ async def execute_parallel_tests(self, test_session: ParallelTestSession) -> ParallelTestSession:
46
+ """Execute tests in parallel with proper isolation.
47
+
48
+ Args:
49
+ test_session: Session containing test configurations
50
+
51
+ Returns:
52
+ Updated session with results
53
+ """
54
+ logging.debug(f"Starting parallel test execution for session: {test_session.session_id}")
55
+ test_session.start_session()
56
+
57
+ try:
58
+ # Get enabled tests
59
+ enabled_tests = test_session.get_enabled_tests()
60
+ if not enabled_tests:
61
+ logging.warning("No enabled tests found")
62
+ return test_session
63
+
64
+ # Execute tests in batches to respect concurrency limits
65
+ await self._execute_tests_in_batches(test_session, enabled_tests)
66
+
67
+ test_session.complete_session()
68
+ except asyncio.CancelledError:
69
+ logging.warning("Parallel test execution cancelled – generating partial report.")
70
+ raise
71
+ except Exception as e:
72
+ logging.error(f"Error in parallel test execution: {e}")
73
+ raise
74
+ finally:
75
+ # Consolidated cleanup, aggregation, and report generation
76
+ await self._finalize_session(test_session)
77
+
78
+ return test_session
79
+
80
+ async def _execute_tests_in_batches(
81
+ self, test_session: ParallelTestSession, enabled_tests: List[TestConfiguration]
82
+ ):
83
+ """Execute tests in concurrent batches."""
84
+
85
+ # Resolve dependencies and create execution order
86
+ execution_batches = self._resolve_test_dependencies(enabled_tests)
87
+ # Get report_config from the first test configuration if available
88
+ report_config = None
89
+ if test_session.test_configurations:
90
+ report_config = test_session.test_configurations[0].report_config
91
+ self.result_aggregator = ResultAggregator(report_config)
92
+
93
+ for batch_idx, test_batch in enumerate(execution_batches):
94
+ logging.debug(f"Executing batch {batch_idx + 1}/{len(execution_batches)} with {len(test_batch)} tests")
95
+
96
+ # Create semaphore for this batch
97
+ semaphore = asyncio.Semaphore(min(self.max_concurrent_tests, len(test_batch)))
98
+
99
+ # Create tasks for this batch
100
+ batch_tasks = []
101
+ for test_config in test_batch:
102
+ task = asyncio.create_task(self._execute_single_test(test_session, test_config, semaphore))
103
+ batch_tasks.append(task)
104
+ self.running_tests[test_config.test_id] = task
105
+
106
+ # Wait for batch completion
107
+ try:
108
+ try:
109
+ results = await asyncio.gather(*batch_tasks, return_exceptions=True)
110
+ except asyncio.CancelledError:
111
+ logging.warning("Batch was cancelled – collecting completed task results.")
112
+
113
+ results = []
114
+ for task in batch_tasks:
115
+ if task.done():
116
+ try:
117
+ results.append(task.result())
118
+ except Exception as e:
119
+ results.append(e)
120
+ else:
121
+ # Task not finished (still cancelled/pending)
122
+ results.append(asyncio.CancelledError())
123
+ cancelled_in_batch = True
124
+ else:
125
+ cancelled_in_batch = False
126
+
127
+ # Process results
128
+ for i, result in enumerate(results):
129
+ test_config = test_batch[i]
130
+ if isinstance(result, Exception):
131
+ if isinstance(result, asyncio.CancelledError):
132
+ logging.warning(f"Test {test_config.test_name} was cancelled.")
133
+ cancelled_result = TestResult(
134
+ test_id=test_config.test_id,
135
+ test_type=test_config.test_type,
136
+ test_name=test_config.test_name,
137
+ status=TestStatus.CANCELLED,
138
+ category=get_category_for_test_type(test_config.test_type),
139
+ error_message="Test was cancelled",
140
+ )
141
+ test_session.update_test_result(test_config.test_id, cancelled_result)
142
+ else:
143
+ logging.error(f"Test {test_config.test_name} failed with exception: {result}")
144
+ failed_result = TestResult(
145
+ test_id=test_config.test_id,
146
+ test_type=test_config.test_type,
147
+ test_name=test_config.test_name,
148
+ status=TestStatus.FAILED,
149
+ category=get_category_for_test_type(test_config.test_type),
150
+ error_message=str(result),
151
+ )
152
+ test_session.update_test_result(test_config.test_id, failed_result)
153
+ else:
154
+ test_session.update_test_result(test_config.test_id, result)
155
+
156
+ finally:
157
+ # Clean up batch tasks
158
+ for test_config in test_batch:
159
+ self.running_tests.pop(test_config.test_id, None)
160
+
161
+ logging.debug(f"Batch {batch_idx + 1} completed")
162
+ if cancelled_in_batch:
163
+ # Propagate cancellation after processing.
164
+ raise asyncio.CancelledError()
165
+
166
+ async def _execute_single_test(
167
+ self, test_session: ParallelTestSession, test_config: TestConfiguration, semaphore: asyncio.Semaphore
168
+ ) -> TestResult:
169
+ """Execute a single test with proper isolation."""
170
+
171
+ async with semaphore:
172
+ test_context = test_session.test_contexts[test_config.test_id]
173
+ test_context.start_execution()
174
+
175
+ logging.debug(f"Starting test: {test_config.test_name} ({test_config.test_type.value})")
176
+
177
+ try:
178
+ if test_config.test_type in [
179
+ TestType.UI_AGENT_LANGGRAPH,
180
+ TestType.UX_TEST,
181
+ TestType.BASIC_TEST
182
+ # TestType.BUTTON_TEST,
183
+ # TestType.WEB_BASIC_CHECK,
184
+ ]:
185
+
186
+ # Create isolated browser session
187
+ session = await self.session_manager.create_session(test_config.browser_config)
188
+ test_context.session_id = session.session_id
189
+
190
+ # Navigate to target URL
191
+ await session.navigate_to(
192
+ test_session.target_url, cookies=test_config.test_specific_config.get("cookies", None)
193
+ )
194
+
195
+ elif test_config.test_type == TestType.SECURITY_TEST:
196
+ # Security tests don't need browser sessions, use a placeholder
197
+ session = None
198
+ test_context.session_id = SECURITY_TEST_NO_SESSION_ID
199
+
200
+ else:
201
+ session = await self.session_manager.browser_session(test_config.browser_config)
202
+ test_context.session_id = session.session_id
203
+
204
+ # Get appropriate test runner
205
+ runner = self.test_runners.get(test_config.test_type)
206
+ if not runner:
207
+ raise ValueError(f"No runner available for test type: {test_config.test_type}")
208
+
209
+ # Execute test
210
+ result = await runner.run_test(
211
+ session=session,
212
+ test_config=test_config,
213
+ llm_config=test_session.llm_config,
214
+ target_url=test_session.target_url,
215
+ )
216
+
217
+ # Mark execution outcome according to the returned result status.
218
+ is_success = result.status == TestStatus.PASSED
219
+ test_context.complete_execution(
220
+ success=is_success, error_message=result.error_message if not is_success else ""
221
+ )
222
+ result.start_time = test_context.start_time
223
+ result.end_time = test_context.end_time
224
+ result.duration = test_context.duration
225
+
226
+ logging.debug(f"Test completed successfully: {test_config.test_name}")
227
+ return result
228
+
229
+ except Exception as e:
230
+ error_msg = f"Test execution failed: {str(e)}"
231
+ test_context.complete_execution(success=False, error_message=error_msg)
232
+
233
+ # Create failed result
234
+ result = TestResult(
235
+ test_id=test_config.test_id,
236
+ test_type=test_config.test_type,
237
+ test_name=test_config.test_name,
238
+ status=TestStatus.FAILED,
239
+ category=get_category_for_test_type(test_config.test_type),
240
+ start_time=test_context.start_time,
241
+ end_time=test_context.end_time,
242
+ duration=test_context.duration,
243
+ error_message=error_msg,
244
+ )
245
+ return result
246
+
247
+ except asyncio.CancelledError:
248
+ # The task was cancelled (e.g., by cancel_test / KeyboardInterrupt).
249
+ logging.warning(f"Test cancelled: {test_config.test_name}")
250
+
251
+ test_context.complete_execution(success=False, error_message="Test was cancelled")
252
+
253
+ cancelled_result = TestResult(
254
+ test_id=test_config.test_id,
255
+ test_type=test_config.test_type,
256
+ test_name=test_config.test_name,
257
+ status=TestStatus.CANCELLED,
258
+ category=get_category_for_test_type(test_config.test_type),
259
+ start_time=test_context.start_time,
260
+ end_time=test_context.end_time,
261
+ duration=test_context.duration,
262
+ error_message="Test was cancelled",
263
+ )
264
+
265
+ return cancelled_result
266
+
267
+ finally:
268
+ # Clean up browser session
269
+ if test_context.session_id and test_context.session_id != SECURITY_TEST_NO_SESSION_ID:
270
+ await self.session_manager.close_session(test_context.session_id)
271
+
272
+ def _resolve_test_dependencies(self, tests: List[TestConfiguration]) -> List[List[TestConfiguration]]:
273
+ """Resolve test dependencies and return execution batches.
274
+
275
+ Returns:
276
+ List of test batches where each batch can run in parallel
277
+ """
278
+ # dependencies for login
279
+ independent_tests = [test for test in tests if not test.dependencies]
280
+ dependent_tests = [test for test in tests if test.dependencies]
281
+
282
+ batches = []
283
+
284
+ # First batches: independent tests (split by max_concurrent_tests)
285
+ if independent_tests:
286
+ # Split independent tests into batches based on max_concurrent_tests
287
+ for i in range(0, len(independent_tests), self.max_concurrent_tests):
288
+ batch = independent_tests[i : i + self.max_concurrent_tests]
289
+ batches.append(batch)
290
+
291
+ # Additional batches for dependent tests (also split by max_concurrent_tests)
292
+ if dependent_tests:
293
+ for i in range(0, len(dependent_tests), self.max_concurrent_tests):
294
+ batch = dependent_tests[i : i + self.max_concurrent_tests]
295
+ batches.append(batch)
296
+
297
+ return batches
298
+
299
+ async def cancel_test(self, test_id: str):
300
+ """Cancel a running test."""
301
+ if test_id in self.running_tests:
302
+ task = self.running_tests[test_id]
303
+ task.cancel()
304
+ logging.debug(f"Test cancelled: {test_id}")
305
+
306
+ async def cancel_all_tests(self):
307
+ """Cancel all running tests."""
308
+ for test_id in list(self.running_tests.keys()):
309
+ await self.cancel_test(test_id)
310
+
311
+ await self.session_manager.close_all_sessions()
312
+ logging.debug("All tests cancelled")
313
+
314
+ def get_running_tests(self) -> List[str]:
315
+ """Get list of currently running test IDs."""
316
+ return list(self.running_tests.keys())
317
+
318
+ def get_test_status(self, test_id: str) -> Optional[TestStatus]:
319
+ """Get status of a specific test."""
320
+ if test_id in self.running_tests:
321
+ return TestStatus.RUNNING
322
+ elif test_id in self.completed_tests:
323
+ return self.completed_tests[test_id].status
324
+ return None
325
+
326
+ async def _finalize_session(self, test_session: ParallelTestSession):
327
+ """Close sessions, aggregate results, and generate reports for the given session.
328
+
329
+ This helper consolidates cleanup and report generation logic to avoid duplication
330
+ across normal completion, cancellation, and error paths.
331
+ """
332
+ # Ensure all browser sessions are closed
333
+ await self.session_manager.close_all_sessions()
334
+
335
+ # Aggregate results
336
+ aggregated_results = await self.result_aggregator.aggregate_results(test_session)
337
+ test_session.aggregated_results = aggregated_results
338
+
339
+ # Generate JSON & HTML reports
340
+ report_path = await self.result_aggregator.generate_json_report(test_session)
341
+ test_session.report_path = report_path
342
+
343
+ report_dir = os.path.dirname(report_path)
344
+ html_path = self.result_aggregator.generate_html_report_fully_inlined(
345
+ test_session, report_dir=report_dir
346
+ )
347
+ test_session.html_report_path = html_path
348
+
349
+ logging.debug(f"Report generated: {report_path}")
350
+ logging.debug(f"HTML report generated: {html_path}")
351
+
352
+ # Mark session as completed if not already done
353
+ if test_session.end_time is None:
354
+ test_session.complete_session()
webqa_agent/executor/parallel_mode.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import uuid
3
+ import os
4
+ from datetime import datetime
5
+ from typing import Any, Dict, List, Optional, Tuple, Coroutine
6
+
7
+ from webqa_agent.browser.config import DEFAULT_CONFIG
8
+ from webqa_agent.data import ParallelTestSession, TestConfiguration, TestType, get_default_test_name
9
+ from webqa_agent.executor import ParallelTestExecutor
10
+ from webqa_agent.utils import Display
11
+ from webqa_agent.utils.get_log import GetLog
12
+ from webqa_agent.utils.log_icon import icon
13
+
14
+ class ParallelMode:
15
+ """Parallel test mode - runs tests concurrently with data isolation"""
16
+
17
+ def __init__(self, tests: List, max_concurrent_tests: int = 4):
18
+ self.max_concurrent_tests = max_concurrent_tests
19
+ self.executor = ParallelTestExecutor(max_concurrent_tests)
20
+
21
+ async def run(
22
+ self,
23
+ url: str,
24
+ llm_config: Dict[str, Any],
25
+ browser_config: Optional[Dict[str, Any]] = None,
26
+ test_configurations: Optional[List[Dict[str, Any]]] = None,
27
+ log_cfg: Optional[Dict[str, Any]] = None,
28
+ report_cfg: Optional[Dict[str, Any]] = None
29
+ ) -> Tuple[Dict[str, Any], str]:
30
+ """Run tests in parallel mode with configurable test types.
31
+
32
+ Args:
33
+ url: Target URL to test
34
+ llm_config: Configuration for language models
35
+ browser_config: Default browser configuration
36
+ test_configurations: Custom test configurations for parallel execution
37
+ log_cfg: Configuration for logger
38
+ report_cfg: Configuration for report
39
+
40
+ Returns:
41
+ Tuple of (aggregated_results, report_path)
42
+ """
43
+ try:
44
+
45
+ GetLog.get_log(log_level=log_cfg["level"])
46
+ Display.init(language=report_cfg["language"])
47
+ Display.display.start()
48
+
49
+ logging.info(f"{icon['rocket']} Starting tests for URL: {url}, parallel mode {self.max_concurrent_tests}")
50
+
51
+ # Use default config if none provided
52
+ if not browser_config:
53
+ browser_config = DEFAULT_CONFIG.copy()
54
+
55
+ # Create test session
56
+ test_session = ParallelTestSession(session_id=str(uuid.uuid4()), target_url=url, llm_config=llm_config)
57
+
58
+ # Use a fresh per-task timestamp for reports and keep logs separate
59
+ report_ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_%f")
60
+ os.environ["WEBQA_REPORT_TIMESTAMP"] = report_ts
61
+
62
+ # Configure tests based on input or legacy test objects
63
+ if test_configurations:
64
+ self._configure_tests_from_config(test_session, test_configurations, browser_config, report_cfg)
65
+
66
+ # Execute tests in parallel
67
+ completed_session = await self.executor.execute_parallel_tests(test_session)
68
+
69
+ result = completed_session.aggregated_results.get("count", {})
70
+
71
+
72
+ await Display.display.stop()
73
+ Display.display.render_summary()
74
+ # Return results in format compatible with existing code
75
+ return (
76
+ completed_session.aggregated_results,
77
+ completed_session.report_path,
78
+ completed_session.html_report_path,
79
+ result,
80
+ )
81
+
82
+ except Exception as e:
83
+ logging.error(f"Error in parallel mode: {e}")
84
+ raise
85
+
86
+ def _configure_tests_from_config(
87
+ self,
88
+ test_session: ParallelTestSession,
89
+ test_configurations: List[Dict[str, Any]],
90
+ default_browser_config: Dict[str, Any],
91
+ report_cfg: Dict[str, Any]
92
+ ):
93
+ """Configure tests from provided configuration."""
94
+ for config in test_configurations:
95
+ test_type_str = config.get("test_type", "basic_test")
96
+
97
+ # Map string to TestType enum
98
+ test_type = self._map_test_type(test_type_str)
99
+
100
+ # Merge browser config
101
+ browser_config = {**default_browser_config, **config.get("browser_config", {})}
102
+
103
+ test_config = TestConfiguration(
104
+ test_id=str(uuid.uuid4()),
105
+ test_type=test_type,
106
+ test_name=get_default_test_name(test_type, report_cfg["language"]),
107
+ enabled=config.get("enabled", True),
108
+ browser_config=browser_config,
109
+ report_config=report_cfg,
110
+ test_specific_config=config.get("test_specific_config", {}),
111
+ timeout=config.get("timeout", 300),
112
+ retry_count=config.get("retry_count", 0),
113
+ dependencies=config.get("dependencies", []),
114
+ )
115
+
116
+ test_session.add_test_configuration(test_config)
117
+
118
+ def _map_test_type(self, test_type_str: str) -> TestType:
119
+ """Map string to TestType enum."""
120
+ mapping = {
121
+ "ui_agent_langgraph": TestType.UI_AGENT_LANGGRAPH,
122
+ "ux_test": TestType.UX_TEST,
123
+ "performance": TestType.PERFORMANCE,
124
+ "basic_test": TestType.BASIC_TEST,
125
+ # "web_basic_check": TestType.WEB_BASIC_CHECK,
126
+ # "button_test": TestType.BUTTON_TEST,
127
+ "security": TestType.SECURITY_TEST,
128
+ "security_test": TestType.SECURITY_TEST,
129
+ }
130
+
131
+ return mapping.get(test_type_str, TestType.BASIC_TEST)
webqa_agent/executor/result_aggregator.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from webqa_agent.data import ParallelTestSession, TestStatus
8
+ from webqa_agent.llm.llm_api import LLMAPI
9
+ from webqa_agent.utils import i18n
10
+
11
+ class ResultAggregator:
12
+ """Aggregates and analyzes parallel test results"""
13
+
14
+ def __init__(self, report_config: dict = None):
15
+ """Initialize ResultAggregator with language support.
16
+
17
+ Args:
18
+ report_config: Configuration dictionary containing language settings
19
+ """
20
+ self.language = report_config.get("language", "zh-CN") if report_config else "zh-CN"
21
+ self.localized_strings = {
22
+ 'zh-CN': i18n.get_lang_data('zh-CN').get('aggregator', {}),
23
+ 'en-US': i18n.get_lang_data('en-US').get('aggregator', {}),
24
+ }
25
+
26
+ def _get_text(self, key: str) -> str:
27
+ """Get localized text for the given key."""
28
+ return self.localized_strings.get(self.language, {}).get(key, key)
29
+
30
+ async def aggregate_results(self, test_session: ParallelTestSession) -> Dict[str, Any]:
31
+ """Aggregate all test results into a comprehensive summary.
32
+
33
+ Args:
34
+ test_session: Session containing all test results
35
+
36
+ Returns:
37
+ Aggregated results dictionary
38
+ """
39
+ logging.debug(f"Aggregating results for session: {test_session.session_id}")
40
+ issues = []
41
+ error_message = await self._get_error_message(test_session)
42
+ # Generate issue list (LLM powered when possible)
43
+ llm_issues = await self._generate_llm_issues(test_session)
44
+
45
+ issues.extend(error_message)
46
+ issues.extend(llm_issues)
47
+ logging.info(f"Aggregated {len(test_session.test_results)} test results, found {len(issues)} issues")
48
+ for test_id, result in test_session.test_results.items():
49
+ sub_tests_count = len(result.sub_tests or [])
50
+ logging.debug(f"Test {test_id} has {sub_tests_count} sub_tests")
51
+ if result.sub_tests:
52
+ for i, sub_test in enumerate(result.sub_tests):
53
+ logging.debug(f"Sub-test {i}: status={sub_test.status}")
54
+
55
+ total_sub_tests = sum(len(r.sub_tests or []) for r in test_session.test_results.values())
56
+ passed_sub_tests = sum(
57
+ 1
58
+ for r in test_session.test_results.values()
59
+ for sub in (r.sub_tests or [])
60
+ if sub.status == TestStatus.PASSED
61
+ )
62
+ critical_sub_tests = total_sub_tests - passed_sub_tests # 未通过即视为关键问题
63
+
64
+ logging.debug(f"Debug: total_sub_tests={total_sub_tests}, passed_sub_tests={passed_sub_tests}, critical_sub_tests={critical_sub_tests}")
65
+
66
+ # Build content for executive summary tab
67
+ executive_content = {
68
+ "executiveSummary": "",
69
+ "statistics": [
70
+ {"label": self._get_text('assessment_categories'), "value": str(total_sub_tests), "colorClass": "var(--warning-color)"},
71
+ {"label": self._get_text('passed_count'), "value": str(passed_sub_tests), "colorClass": "var(--success-color)"},
72
+ {"label": self._get_text('failed_count'), "value": str(critical_sub_tests), "colorClass": "var(--failure-color)"},
73
+ ]
74
+ }
75
+
76
+ aggregated_results_list = [
77
+ {"id": "subtab-summary-advice", "title": self._get_text('summary_and_advice'), "content": executive_content},
78
+ {
79
+ "id": "subtab-issue-tracker",
80
+ "title": self._get_text('issue_list'),
81
+ "content": {
82
+ "title": self._get_text('issue_tracker_list'),
83
+ "note": self._get_text('issue_list_note'),
84
+ "issues": issues,
85
+ },
86
+ },
87
+ ]
88
+
89
+ # Store additional raw analysis for LLM etc.
90
+ raw_analysis = {
91
+ "session_summary": test_session.get_summary_stats(),
92
+ }
93
+
94
+ def dict_to_text(d, indent=0):
95
+ lines = []
96
+ for k, v in d.items():
97
+ if isinstance(v, dict):
98
+ lines.append(" " * indent + f"{k}:")
99
+ lines.append(dict_to_text(v, indent + 2))
100
+ else:
101
+ lines.append(" " * indent + f"{k}: {v}")
102
+ return "\n".join(lines)
103
+
104
+ executive_content["executiveSummary"] = f"{dict_to_text(raw_analysis['session_summary'])}"
105
+
106
+ # Also expose simple counters at the top-level for easy consumption
107
+ return {
108
+ "title": self._get_text('assessment_overview'),
109
+ "tabs": aggregated_results_list,
110
+ "count":{
111
+ "total": total_sub_tests,
112
+ "passed": passed_sub_tests,
113
+ "failed": critical_sub_tests,
114
+ }
115
+ }
116
+
117
+ async def _generate_llm_issues(self, test_session: ParallelTestSession) -> List[Dict[str, Any]]:
118
+ """Use LLM to summarise issues for each sub-test.
119
+
120
+ Fallback to heuristic if LLM unavailable.
121
+ """
122
+ llm_config = test_session.llm_config or {}
123
+ use_llm = bool(llm_config)
124
+ critical_issues: List[Dict[str, Any]] = []
125
+
126
+ # Prepare LLM client if configured
127
+ llm: Optional[LLMAPI] = None
128
+ if use_llm:
129
+ try:
130
+ llm = LLMAPI(llm_config)
131
+ await llm.initialize()
132
+ except Exception as e:
133
+ logging.error(f"Failed to initialise LLM, falling back to heuristic issue extraction: {e}")
134
+ use_llm = False
135
+
136
+ # Iterate over all tests and their sub-tests
137
+ for test_result in test_session.test_results.values():
138
+ for sub in test_result.sub_tests or []:
139
+ try:
140
+ # Determine severity strictly based on sub-test status
141
+ if sub.status == TestStatus.PASSED:
142
+ continue # No issue for passed sub-tests
143
+ if sub.status == TestStatus.WARNING:
144
+ severity_level = "low"
145
+ elif sub.status == TestStatus.FAILED:
146
+ severity_level = "high"
147
+ else:
148
+ severity_level = "medium"
149
+
150
+ issue_entry = {
151
+ "issue_name": self._get_text('test_failed_prefix') + test_result.test_name,
152
+ "issue_type": test_result.test_type.value,
153
+ "sub_test_name": sub.name,
154
+ "severity": severity_level,
155
+ }
156
+ if use_llm and llm:
157
+ prompt_content = {
158
+ "name": sub.name,
159
+ "status": sub.status,
160
+ "report": sub.report,
161
+ "metrics": sub.metrics,
162
+ "final_summary": sub.final_summary,
163
+ }
164
+ prompt = (
165
+ f"{self._get_text('llm_prompt_main')}\n\n"
166
+ f"{self._get_text('llm_prompt_test_info')}{json.dumps(prompt_content, ensure_ascii=False, default=str)}"
167
+ )
168
+ logging.debug(f"LLM Issue Prompt: {prompt}")
169
+ llm_response_raw = await llm.get_llm_response("", prompt)
170
+ llm_response = llm._clean_response(llm_response_raw)
171
+ logging.debug(f"LLM Issue Response: {llm_response}")
172
+ try:
173
+ parsed = json.loads(llm_response)
174
+ issue_count = parsed.get("issue_count", parsed.get("count", 1))
175
+ if issue_count == 0:
176
+ continue
177
+ issue_text = parsed.get("issues", "").strip()
178
+ if not issue_text:
179
+ continue
180
+ llm_severity = parsed.get("severity", severity_level)
181
+ issue_entry["severity"] = llm_severity
182
+ issue_entry["issues"] = issue_text
183
+ issue_entry["issue_count"] = issue_count
184
+ except Exception as parse_err:
185
+ logging.error(f"Failed to parse LLM JSON: {parse_err}; raw: {llm_response}")
186
+ continue # skip if cannot parse
187
+ else:
188
+ # Heuristic fallback – use final_summary to detect issue presence
189
+ summary_text = (sub.final_summary or "").strip()
190
+ if not summary_text:
191
+ continue
192
+ lowered = summary_text.lower()
193
+ if any(k in lowered for k in ["error", "fail", "严重", "错误", "崩溃", "无法"]):
194
+ issue_entry["severity"] = "high"
195
+ elif any(k in lowered for k in ["warning", "警告", "建议", "优化", "改进"]):
196
+ issue_entry["severity"] = "low"
197
+ else:
198
+ issue_entry["severity"] = "medium"
199
+ issue_entry["issues"] = summary_text
200
+ issue_entry["issue_count"] = 1
201
+ # add populated entry
202
+ critical_issues.append(issue_entry)
203
+ except Exception as e:
204
+ logging.error(f"Error while generating issue summary for sub-test {sub.name}: {e}")
205
+ continue # skip problematic sub-test
206
+ # Close LLM client if needed
207
+ if use_llm and llm:
208
+ try:
209
+ await llm.close()
210
+ except Exception as e:
211
+ logging.warning(f"Failed to close LLM client: {e}")
212
+ return critical_issues
213
+
214
+ async def _get_error_message(self, test_session: ParallelTestSession) -> str:
215
+ """Get error message from test session."""
216
+ error_message = []
217
+ for test_result in test_session.test_results.values():
218
+ if test_result.status != TestStatus.PASSED:
219
+ # Only append if error_message is not empty
220
+ if test_result.error_message:
221
+ error_message.append({
222
+ "issue_name": self._get_text('execution_error_prefix') + test_result.test_name,
223
+ "issue_type": test_result.test_type.value,
224
+ "severity": "high",
225
+ "issues": test_result.error_message
226
+ })
227
+ return error_message
228
+
229
+ async def generate_json_report(self, test_session: ParallelTestSession, report_dir: str | None = None) -> str:
230
+ """Generate comprehensive JSON report."""
231
+ try:
232
+ # Determine report directory
233
+ if report_dir is None:
234
+ timestamp = os.getenv("WEBQA_REPORT_TIMESTAMP") or os.getenv("WEBQA_TIMESTAMP")
235
+ report_dir = f"./reports/test_{timestamp}"
236
+ os.makedirs(report_dir, exist_ok=True)
237
+
238
+ json_path = os.path.join(report_dir, "test_results.json")
239
+ with open(json_path, "w", encoding="utf-8") as f:
240
+ json.dump(test_session.to_dict(), f, indent=2, ensure_ascii=False, default=str)
241
+
242
+ absolute_path = os.path.abspath(json_path)
243
+ if os.getenv("DOCKER_ENV"):
244
+ host_path = absolute_path.replace("/app/reports", "./reports")
245
+ logging.debug(f"JSON report generated: {host_path}")
246
+ return host_path
247
+ else:
248
+ logging.debug(f"JSON report generated: {absolute_path}")
249
+ return absolute_path
250
+
251
+ except Exception as e:
252
+ logging.error(f"Failed to generate JSON report: {e}")
253
+ return ""
254
+
255
+ def _get_static_dir(self) -> Path:
256
+ """Resolve the static assets directory in a robust way.
257
+
258
+ This uses the source file location of this module instead of the working
259
+ directory to avoid issues on hosted platforms.
260
+ """
261
+ # __file__ → .../webqa_agent/executor/result_aggregator.py
262
+ # static dir → .../webqa_agent/static
263
+ executor_dir = Path(__file__).resolve().parent
264
+ static_dir = (executor_dir.parent / "static").resolve()
265
+ return static_dir
266
+
267
+ def _read_css_content(self) -> str:
268
+ """Read and return CSS content."""
269
+ try:
270
+ css_path = self._get_static_dir() / "assets" / "style.css"
271
+ if css_path.exists():
272
+ return css_path.read_text(encoding="utf-8")
273
+ except Exception as e:
274
+ logging.warning(f"Failed to read CSS file: {e}")
275
+ return ""
276
+
277
+ def _read_js_content(self) -> str:
278
+ """Read and return JavaScript content based on language."""
279
+ try:
280
+ # Choose JS file based on language
281
+ if self.language == "en-US":
282
+ js_filename = "index_en-US.js"
283
+ else:
284
+ js_filename = "index.js" # Default to Chinese version
285
+
286
+ js_path = self._get_static_dir() / "assets" / js_filename
287
+ if js_path.exists():
288
+ return js_path.read_text(encoding="utf-8")
289
+ else:
290
+ # Fallback to default file if language-specific file doesn't exist
291
+ fallback_path = self._get_static_dir() / "assets" / "index.js"
292
+ if fallback_path.exists():
293
+ logging.warning(f"Language-specific JS file {js_filename} not found, using fallback")
294
+ return fallback_path.read_text(encoding="utf-8")
295
+ except Exception as e:
296
+ logging.warning(f"Failed to read JS file: {e}")
297
+ return ""
298
+
299
+ def generate_html_report_fully_inlined(self, test_session, report_dir: str | None = None) -> str:
300
+ """Generate a fully inlined HTML report for the test session."""
301
+ import re
302
+ import json
303
+ import re
304
+
305
+ try:
306
+ template_file = self._get_static_dir() / "index.html"
307
+
308
+ template_found = template_file.exists()
309
+ if template_found:
310
+ html_template = template_file.read_text(encoding="utf-8")
311
+ else:
312
+ logging.warning(
313
+ f"Report template not found at {template_file}. Falling back to minimal inline template."
314
+ )
315
+
316
+ datajs_content = (
317
+ "window.testResultData = " + json.dumps(test_session.to_dict(), ensure_ascii=False, default=str) + ";"
318
+ )
319
+
320
+ if template_found:
321
+ css_content = self._read_css_content()
322
+ js_content = self._read_js_content()
323
+
324
+ html_out = html_template
325
+ html_out = re.sub(
326
+ r'<link\s+rel="stylesheet"\s+href="/assets/style.css"\s*>',
327
+ lambda m: f"<style>\n{css_content}\n</style>",
328
+ html_out,
329
+ )
330
+ html_out = re.sub(
331
+ r'<script\s+src="/data.js"\s*>\s*</script>',
332
+ lambda m: f"<script>\n{datajs_content}\n</script>",
333
+ html_out,
334
+ )
335
+ html_out = re.sub(
336
+ r'<script\s+type="module"\s+crossorigin\s+src="/assets/index.js"\s*>\s*</script>',
337
+ lambda m: f'<script type="module">\n{js_content}\n</script>',
338
+ html_out,
339
+ )
340
+
341
+ if report_dir is None:
342
+ timestamp = os.getenv("WEBQA_REPORT_TIMESTAMP") or os.getenv("WEBQA_TIMESTAMP")
343
+ report_dir = f"./reports/test_{timestamp}"
344
+ # Ensure report dir exists; if creation fails, fallback to tmp
345
+ try:
346
+ os.makedirs(report_dir, exist_ok=True)
347
+ report_dir_path = Path(report_dir).resolve()
348
+ except Exception as mk_err:
349
+ logging.warning(f"Cannot create report dir '{report_dir}': {mk_err}. Falling back to /tmp/webqa-reports.")
350
+ report_dir_path = Path("/tmp/webqa-reports").resolve()
351
+ report_dir_path.mkdir(parents=True, exist_ok=True)
352
+
353
+ html_path = report_dir_path / "test_report.html"
354
+ html_path.write_text(html_out, encoding="utf-8")
355
+
356
+ absolute_path = str(html_path)
357
+ if os.getenv("DOCKER_ENV"):
358
+ mapped = absolute_path.replace("/app/reports", "./reports")
359
+ logging.debug(f"HTML report generated: {mapped}")
360
+ return mapped
361
+ else:
362
+ logging.debug(f"HTML report generated: {absolute_path}")
363
+ return absolute_path
364
+ except Exception as e:
365
+ logging.error(f"Failed to generate fully inlined HTML report: {e}")
366
+ return ""
webqa_agent/executor/test_runners.py ADDED
@@ -0,0 +1,888 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import time
5
+ from abc import ABC, abstractmethod
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List
9
+
10
+ from webqa_agent.browser.session import BrowserSession
11
+ from webqa_agent.data import TestConfiguration, TestResult, TestStatus
12
+ from webqa_agent.data.test_structures import (SubTestReport, SubTestResult,
13
+ get_category_for_test_type)
14
+ from webqa_agent.testers import (LighthouseMetricsTest, PageButtonTest,
15
+ PageContentTest, PageTextTest,
16
+ WebAccessibilityTest)
17
+ from webqa_agent.utils import Display
18
+ from webqa_agent.utils.log_icon import icon
19
+ from webqa_agent.utils import i18n
20
+
21
+
22
+ class BaseTestRunner(ABC):
23
+ """Base class for test runners."""
24
+
25
+ @abstractmethod
26
+ async def run_test(
27
+ self, session: BrowserSession, test_config: TestConfiguration, llm_config: Dict[str, Any], target_url: str
28
+ ) -> TestResult:
29
+ """Run the test and return results."""
30
+ pass
31
+
32
+
33
+ class UIAgentLangGraphRunner(BaseTestRunner):
34
+ """Runner for UIAgent LangGraph tests."""
35
+
36
+ async def run_test(
37
+ self, session: BrowserSession, test_config: TestConfiguration, llm_config: Dict[str, Any], target_url: str
38
+ ) -> TestResult:
39
+ """Run UIAgent LangGraph test using LangGraph workflow with
40
+ ParallelUITester."""
41
+
42
+ with Display.display(test_config.test_name):
43
+ from webqa_agent.testers.case_gen.graph import app as graph_app
44
+ from webqa_agent.testers.function_tester import UITester
45
+
46
+ result = TestResult(
47
+ test_id=test_config.test_id,
48
+ test_type=test_config.test_type,
49
+ test_name=test_config.test_name,
50
+ status=TestStatus.RUNNING,
51
+ category=get_category_for_test_type(test_config.test_type),
52
+ )
53
+
54
+ parallel_tester: UITester | None = None
55
+ try:
56
+ parallel_tester = UITester(llm_config=llm_config, browser_session=session)
57
+ await parallel_tester.initialize()
58
+
59
+ business_objectives = test_config.test_specific_config.get('business_objectives', '')
60
+ logging.info(f"{icon['running']} Running test: {test_config.test_name} with business objectives: {business_objectives}")
61
+
62
+ cookies = test_config.test_specific_config.get('cookies')
63
+
64
+ initial_state = {
65
+ 'url': target_url,
66
+ 'business_objectives': business_objectives,
67
+ 'cookies': cookies,
68
+ 'completed_cases': [],
69
+ 'reflection_history': [],
70
+ 'remaining_objectives': business_objectives,
71
+ 'ui_tester_instance': parallel_tester,
72
+ 'current_test_case_index': 0,
73
+ 'language': test_config.report_config.get('language', 'zh-CN'),
74
+ }
75
+
76
+ graph_config = {'configurable': {'ui_tester_instance': parallel_tester}, 'recursion_limit': 100}
77
+
78
+ # Mapping from case name to status obtained from LangGraph aggregate_results
79
+ graph_case_status_map: Dict[str, str] = {}
80
+
81
+ # 执行LangGraph工作流
82
+ graph_completed = False
83
+ async for event in graph_app.astream(initial_state, config=graph_config):
84
+ # Each event is a dict where keys are node names and values are their outputs
85
+ for node_name, node_output in event.items():
86
+ if node_name == 'aggregate_results':
87
+ # Capture final report to retrieve authoritative case statuses
88
+ final_report = node_output.get('final_report', {})
89
+ for idx, case_res in enumerate(final_report.get('completed_summary', [])):
90
+ case_name = case_res.get('case_name') or case_res.get('name') or f'Case_{idx + 1}'
91
+ graph_case_status_map[case_name] = case_res.get('status', 'failed').lower()
92
+
93
+ if node_name == '__end__':
94
+ logging.debug('Graph execution completed successfully')
95
+ graph_completed = True
96
+ break
97
+ else:
98
+ logging.debug(f"Node '{node_name}' completed")
99
+
100
+ # Break out of the outer loop if we found __end__
101
+ if graph_completed:
102
+ break
103
+
104
+ # === 使用UITester的新数据存储机制 ===
105
+ sub_tests = []
106
+ runner_format_report = {}
107
+
108
+ if parallel_tester:
109
+ # 生成符合runner标准格式的完整报告
110
+ test_name = f'UI Agent Test - {target_url}'
111
+ runner_format_report = parallel_tester.generate_runner_format_report(
112
+ test_id=test_config.test_id, test_name=test_name
113
+ )
114
+
115
+ sub_tests_data = runner_format_report.get('sub_tests', [])
116
+ logging.debug(f'Generated runner format report with {len(sub_tests_data)} cases')
117
+
118
+ if not sub_tests_data:
119
+ logging.warning('No sub_tests data found in runner format report')
120
+
121
+ # 将runner格式的sub_tests转换为TestResult.SubTestResult
122
+ for i, case in enumerate(sub_tests_data):
123
+ case_name = case.get('name', f"Unnamed test case - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
124
+ case_steps = case.get('steps', [])
125
+
126
+ # 验证case数据完整性
127
+ logging.debug(f"Processing case {i + 1}: '{case_name}' with {len(case_steps)} steps")
128
+ if not case_steps:
129
+ logging.warning(f"Case '{case_name}' has no steps data")
130
+
131
+ # Prefer status from graph aggregation if available
132
+ sub_status = graph_case_status_map.get(case_name, case.get('status', 'failed')).lower()
133
+ status_mapping = {
134
+ 'pending': TestStatus.PENDING,
135
+ 'running': TestStatus.RUNNING,
136
+ 'passed': TestStatus.PASSED,
137
+ 'completed': TestStatus.WARNING,
138
+ 'failed': TestStatus.FAILED,
139
+ 'cancelled': TestStatus.CANCELLED,
140
+ }
141
+ status_enum = status_mapping.get(sub_status, TestStatus.FAILED)
142
+
143
+ sub_tests.append(
144
+ SubTestResult(
145
+ name=case_name,
146
+ status=status_enum,
147
+ metrics={},
148
+ steps=case_steps,
149
+ messages=case.get('messages', {}),
150
+ start_time=case.get('start_time'),
151
+ end_time=case.get('end_time'),
152
+ final_summary=case.get('final_summary', ''),
153
+ report=case.get('report', []),
154
+ )
155
+ )
156
+
157
+ result.sub_tests = sub_tests
158
+
159
+ # 从runner格式报告提取汇总指标
160
+ results_data = runner_format_report.get('results', {})
161
+ result.add_metric('test_case_count', results_data.get('total_cases', 0))
162
+ result.add_metric('passed_test_cases', results_data.get('passed_cases', 0))
163
+ result.add_metric('failed_test_cases', results_data.get('failed_cases', 0))
164
+ result.add_metric('total_steps', results_data.get('total_steps', 0))
165
+ result.add_metric('success_rate', results_data.get('success_rate', 0))
166
+
167
+ # 从每个case的messages中提取网络和控制台数据并汇总
168
+ total_failed_requests = 0
169
+ total_requests = 0
170
+ total_console_errors = 0
171
+
172
+ for case in runner_format_report.get('sub_tests', []):
173
+ case_messages = case.get('messages', {})
174
+ if isinstance(case_messages, dict):
175
+ network_data = case_messages.get('network', {})
176
+ if isinstance(network_data, dict):
177
+ failed_requests = network_data.get('failed_requests', [])
178
+ responses = network_data.get('responses', [])
179
+ total_failed_requests += len(failed_requests)
180
+ total_requests += len(responses)
181
+
182
+ console_data = case_messages.get('console', [])
183
+ if isinstance(console_data, list):
184
+ total_console_errors += len(console_data)
185
+
186
+ result.add_metric('network_failed_requests_count', total_failed_requests)
187
+ result.add_metric('network_total_requests_count', total_requests)
188
+ result.add_metric('console_error_count', total_console_errors)
189
+
190
+ # 设置整体状态
191
+ runner_status = runner_format_report.get('status', 'failed')
192
+ if runner_status == 'completed':
193
+ result.status = TestStatus.PASSED
194
+ else:
195
+ result.status = TestStatus.FAILED
196
+ result.error_message = runner_format_report.get('error_message', 'Test execution failed')
197
+
198
+ else:
199
+ logging.error('No UITester instance available for data extraction')
200
+ result.status = TestStatus.FAILED
201
+ result.error_message = 'No test cases were executed or results were not available'
202
+
203
+ logging.info(f"{icon['check']} Test completed: {test_config.test_name}")
204
+
205
+ except Exception as e:
206
+ error_msg = f'AI Functional Test failed: {str(e)}'
207
+ result.status = TestStatus.FAILED
208
+ result.error_message = error_msg
209
+ logging.error(error_msg)
210
+ raise
211
+
212
+ finally:
213
+ # Cleanup parallel tester
214
+ if parallel_tester:
215
+ try:
216
+ # UITester现在已经自动管理监控数据,只需要清理资源
217
+ await parallel_tester.cleanup()
218
+ logging.debug('UITester cleanup completed')
219
+ except Exception as e:
220
+ logging.error(f'Error cleaning up UITester: {e}')
221
+
222
+ return result
223
+
224
+
225
+ class UXTestRunner(BaseTestRunner):
226
+ """Runner for UX tests using parallel-friendly test classes without GetLog
227
+ dependencies."""
228
+
229
+ async def run_test(
230
+ self, session: BrowserSession, test_config: TestConfiguration, llm_config: Dict[str, Any], target_url: str
231
+ ) -> TestResult:
232
+ """Run UX tests with enhanced screenshot and data collection."""
233
+
234
+ with Display.display(test_config.test_name):
235
+ result = TestResult(
236
+ test_id=test_config.test_id,
237
+ test_type=test_config.test_type,
238
+ test_name=test_config.test_name,
239
+ status=TestStatus.RUNNING,
240
+ category=get_category_for_test_type(test_config.test_type),
241
+ )
242
+
243
+ try:
244
+ logging.info(f"{icon['running']} Running UX test: {test_config.test_name}")
245
+ page = session.get_page()
246
+
247
+ text_test = PageTextTest(llm_config, report_config=test_config.report_config)
248
+ text_result: SubTestResult = await text_test.run(page=page)
249
+
250
+ # Run ParallelPageContentTest
251
+ content_test = PageContentTest(llm_config, report_config=test_config.report_config)
252
+ content_results: List[SubTestResult] = await content_test.run(page=page)
253
+
254
+ result.sub_tests = content_results + [text_result]
255
+
256
+ # Extract metrics
257
+ content_statuses = [r.status for r in content_results]
258
+ text_status = text_result.status
259
+
260
+ # Determine overall status
261
+ if text_status == 'passed' and all(status == 'passed' for status in content_statuses):
262
+ result.status = TestStatus.PASSED
263
+ else:
264
+ result.status = TestStatus.FAILED
265
+
266
+ # Collect errors from all tests
267
+ all_results = content_results + [text_result]
268
+ errors = [r.messages['page'] for r in all_results if 'page' in r.messages]
269
+
270
+ if errors:
271
+ result.error_message = '; '.join(errors)
272
+
273
+ logging.info(f"{icon['check']} Test completed: {test_config.test_name}")
274
+
275
+ except Exception as e:
276
+ error_msg = f'UX test failed: {str(e)}'
277
+ result.status = TestStatus.FAILED
278
+ result.error_message = error_msg
279
+ logging.error(error_msg)
280
+ raise
281
+
282
+ return result
283
+
284
+
285
+ class LighthouseTestRunner(BaseTestRunner):
286
+ """Runner for Lighthouse."""
287
+
288
+ async def run_test(
289
+ self, session: BrowserSession, test_config: TestConfiguration, llm_config: Dict[str, Any], target_url: str
290
+ ) -> TestResult:
291
+ """Run Lighthouse tests."""
292
+
293
+ with Display.display(test_config.test_name):
294
+ result = TestResult(
295
+ test_id=test_config.test_id,
296
+ test_type=test_config.test_type,
297
+ test_name=test_config.test_name,
298
+ status=TestStatus.RUNNING,
299
+ category=get_category_for_test_type(test_config.test_type),
300
+ )
301
+
302
+ try:
303
+ logging.info(f"{icon['running']} Running test: {test_config.test_name}")
304
+ browser_config = session.browser_config
305
+
306
+ # Only run Lighthouse on Chromium browsers
307
+ if browser_config.get('browser_type') != 'chromium':
308
+ logging.warning('Lighthouse tests require Chromium browser, skipping')
309
+ result.status = TestStatus.INCOMPLETED
310
+ result.results = {'skipped': 'Lighthouse requires Chromium browser'}
311
+ return result
312
+
313
+ # Run Lighthouse test
314
+ lighthouse_test = LighthouseMetricsTest(report_config=test_config.report_config)
315
+ lighthouse_results: SubTestResult = await lighthouse_test.run(target_url, browser_config=browser_config)
316
+
317
+ result.sub_tests = [lighthouse_results]
318
+ result.status = lighthouse_results.status
319
+ logging.info(f"{icon['check']} Test completed: {test_config.test_name}")
320
+
321
+ except Exception as e:
322
+ error_msg = f'Lighthouse test failed: {str(e)}'
323
+ result.status = TestStatus.FAILED
324
+ result.error_message = error_msg
325
+ logging.error(error_msg)
326
+ raise
327
+
328
+ return result
329
+
330
+
331
+ class BasicTestRunner(BaseTestRunner):
332
+ """Runner for Traversal tests."""
333
+
334
+ async def run_test(
335
+ self, session: BrowserSession, test_config: TestConfiguration, llm_config: Dict[str, Any], target_url: str
336
+ ) -> TestResult:
337
+ """Run UX tests with enhanced screenshot and data collection."""
338
+
339
+ with Display.display(test_config.test_name):
340
+ result = TestResult(
341
+ test_id=test_config.test_id,
342
+ test_type=test_config.test_type,
343
+ test_name=test_config.test_name,
344
+ status=TestStatus.RUNNING,
345
+ category=get_category_for_test_type(test_config.test_type),
346
+ )
347
+
348
+ try:
349
+ logging.info(f"{icon['running']} Running test: {test_config.test_name}")
350
+ page = session.get_page()
351
+ browser_config = session.browser_config
352
+
353
+ # Discover clickable elements via crawler
354
+ from webqa_agent.crawler.crawl import CrawlHandler
355
+
356
+ crawler = CrawlHandler(target_url)
357
+ clickable_elements = await crawler.clickable_elements_detection(page)
358
+ logging.info(f'Crawled {len(clickable_elements)} clickable elements')
359
+ if len(clickable_elements) > 50:
360
+ from itertools import islice
361
+ clickable_elements = dict(islice(clickable_elements.items(), 50))
362
+ logging.warning(f'Clickable elements number is too large, only keep the first 50')
363
+
364
+ button_test = PageButtonTest(report_config=test_config.report_config)
365
+ button_test_result = await button_test.run(
366
+ target_url, page=page, clickable_elements=clickable_elements, browser_config=browser_config
367
+ )
368
+
369
+ crawler = CrawlHandler(target_url)
370
+ links = await crawler.extract_links(page)
371
+ logging.info(f'Crawled {len(links)} links')
372
+ # WebAccessibilityTest
373
+ accessibility_test = WebAccessibilityTest(report_config=test_config.report_config)
374
+ accessibility_result = await accessibility_test.run(target_url, links)
375
+
376
+
377
+ # Combine test results into a list
378
+ result.sub_tests = [button_test_result, accessibility_result]
379
+
380
+ # Extract metrics
381
+ button_status = button_test_result.status if button_test_result else TestStatus.FAILED
382
+ accessibility_status = accessibility_result.status if accessibility_result else TestStatus.FAILED
383
+
384
+ # Determine overall status
385
+ if button_status == TestStatus.PASSED and accessibility_status == TestStatus.PASSED:
386
+ result.status = TestStatus.PASSED
387
+ else:
388
+ result.status = TestStatus.FAILED
389
+
390
+ # Collect errors from all tests
391
+ all_results = [button_test_result, accessibility_result]
392
+ errors = [r.messages.get('page') for r in all_results if r and r.messages and 'page' in r.messages]
393
+
394
+ if errors:
395
+ result.error_message = '; '.join(errors)
396
+
397
+ logging.info(f"{icon['check']} Test completed: {test_config.test_name}")
398
+
399
+ except Exception as e:
400
+ error_msg = f'Button test failed: {str(e)}'
401
+ result.status = TestStatus.FAILED
402
+ result.error_message = error_msg
403
+ logging.error(error_msg)
404
+ raise
405
+
406
+ return result
407
+
408
+ # class ButtonTestRunner(BaseTestRunner):
409
+ # """Runner dedicated to button click tests."""
410
+
411
+ # async def run_test(
412
+ # self, session: BrowserSession, test_config: TestConfiguration, llm_config: Dict[str, Any], target_url: str
413
+ # ) -> TestResult:
414
+ # """Run Button test."""
415
+
416
+ # with Display.display(test_config.test_name):
417
+ # result = TestResult(
418
+ # test_id=test_config.test_id,
419
+ # test_type=test_config.test_type,
420
+ # test_name=test_config.test_name,
421
+ # status=TestStatus.RUNNING,
422
+ # category=get_category_for_test_type(test_config.test_type),
423
+ # )
424
+
425
+ # try:
426
+ # logging.info(f"{icon['running']} Running test: {test_config.test_name}")
427
+ # page = session.get_page()
428
+ # browser_config = session.browser_config
429
+
430
+ # # Discover clickable elements via crawler
431
+ # from webqa_agent.crawler.crawl import CrawlHandler
432
+
433
+ # crawler = CrawlHandler(target_url)
434
+ # clickable_elements = await crawler.clickable_elements_detection(page)
435
+ # logging.info(f'Crawled {len(clickable_elements)} clickable elements')
436
+ # if len(clickable_elements) > 50:
437
+ # from itertools import islice
438
+ # clickable_elements = dict(islice(clickable_elements.items(), 50))
439
+ # logging.warning(f'Clickable elements number is too large, only keep the first 50')
440
+
441
+ # button_test = PageButtonTest()
442
+ # button_test_result = await button_test.run(
443
+ # target_url, page=page, clickable_elements=clickable_elements, browser_config=browser_config
444
+ # )
445
+
446
+ # # Second subtest: each clickable result? keep detailed reports if needed; here we only include traverse test
447
+ # result.sub_tests = [button_test_result]
448
+
449
+ # # Overall metrics/status
450
+ # result.status = button_test_result.status
451
+
452
+ # logging.info(f"{icon['check']} Test completed: {test_config.test_name}")
453
+
454
+ # except Exception as e:
455
+ # error_msg = f'Button test failed: {str(e)}'
456
+ # result.status = TestStatus.FAILED
457
+ # result.error_message = error_msg
458
+ # logging.error(error_msg)
459
+ # raise
460
+
461
+ # return result
462
+
463
+
464
+ # class WebBasicCheckRunner(BaseTestRunner):
465
+ # """Runner for Web Basic Check tests."""
466
+
467
+ # async def run_test(
468
+ # self, session: BrowserSession, test_config: TestConfiguration, llm_config: Dict[str, Any], target_url: str
469
+ # ) -> TestResult:
470
+ # """Run Web Basic Check tests."""
471
+
472
+ # with Display.display(test_config.test_name):
473
+ # result = TestResult(
474
+ # test_id=test_config.test_id,
475
+ # test_type=test_config.test_type,
476
+ # test_name=test_config.test_name,
477
+ # status=TestStatus.RUNNING,
478
+ # category=get_category_for_test_type(test_config.test_type),
479
+ # )
480
+
481
+ # try:
482
+ # logging.info(f"{icon['running']} Running test: {test_config.test_name}")
483
+ # page = session.get_page()
484
+
485
+ # # Discover page elements
486
+ # from webqa_agent.crawler.crawl import CrawlHandler
487
+
488
+ # crawler = CrawlHandler(target_url)
489
+ # links = await crawler.extract_links(page)
490
+ # logging.info(f'Crawled {len(links)} links')
491
+ # # WebAccessibilityTest
492
+ # accessibility_test = WebAccessibilityTest(self.llm_config, report_config=self.report_config)
493
+ # accessibility_result = await accessibility_test.run(target_url, links)
494
+
495
+ # result.sub_tests = [accessibility_result]
496
+ # result.status = accessibility_result.status
497
+ # logging.info(f"{icon['check']} Test completed: {test_config.test_name}")
498
+
499
+ # except Exception as e:
500
+ # error_msg = f'Web Basic Check test failed: {str(e)}'
501
+ # result.status = TestStatus.FAILED
502
+ # result.error_message = error_msg
503
+ # logging.error(error_msg)
504
+ # raise
505
+
506
+ # return result
507
+
508
+ class SecurityTestRunner(BaseTestRunner):
509
+ """Runner for Security tests using Nuclei-based scanning."""
510
+
511
+ def __init__(self):
512
+ super().__init__()
513
+ self.language = 'zh-CN' # Default language
514
+ self.localized_strings = {
515
+ 'zh-CN': i18n.get_lang_data('zh-CN').get('testers', {}).get('security', {}),
516
+ 'en-US': i18n.get_lang_data('en-US').get('testers', {}).get('security', {}),
517
+ }
518
+
519
+ def _get_text(self, key: str) -> str:
520
+ """Get localized text for the current language."""
521
+ return self.localized_strings.get(self.language, {}).get(key, key)
522
+
523
+ def get_scan_tags(self, language: str) -> Dict[str, str]:
524
+ """Get scan tags with localized descriptions."""
525
+ return {
526
+ 'cve': self._get_text('cve_scan'),
527
+ 'xss': self._get_text('xss_scan'),
528
+ 'sqli': self._get_text('sqli_scan'),
529
+ 'rce': self._get_text('rce_scan'),
530
+ 'lfi': self._get_text('lfi_scan'),
531
+ 'ssrf': self._get_text('ssrf_scan'),
532
+ 'redirect': self._get_text('redirect_scan'),
533
+ 'exposure': self._get_text('exposure_scan'),
534
+ 'config': self._get_text('config_scan'),
535
+ 'default-login': self._get_text('default_login_scan'),
536
+ 'ssl': self._get_text('ssl_scan'),
537
+ 'dns': self._get_text('dns_scan'),
538
+ 'subdomain-takeover': self._get_text('subdomain_takeover_scan'),
539
+ 'tech': self._get_text('tech_scan'),
540
+ 'panel': self._get_text('panel_scan'),
541
+ }
542
+
543
+ def get_protocol_scans(self, language: str) -> Dict[str, str]:
544
+ """Get protocol scans with localized descriptions."""
545
+ return {
546
+ 'http': self._get_text('http_protocol'),
547
+ 'dns': self._get_text('dns_protocol'),
548
+ 'tcp': self._get_text('tcp_protocol'),
549
+ 'ssl': self._get_text('ssl_protocol'),
550
+ }
551
+
552
+ async def run_test(
553
+ self, session: BrowserSession, test_config: TestConfiguration, llm_config: Dict[str, Any], target_url: str
554
+ ) -> TestResult:
555
+ """Run Security tests using Nuclei scanning."""
556
+
557
+ self.language = test_config.report_config.get('language', 'zh-CN')
558
+ with Display.display(test_config.test_name):
559
+ result = TestResult(
560
+ test_id=test_config.test_id,
561
+ test_type=test_config.test_type,
562
+ test_name=test_config.test_name,
563
+ status=TestStatus.RUNNING,
564
+ category=get_category_for_test_type(test_config.test_type),
565
+ )
566
+
567
+ try:
568
+ # 安全测试不需要浏览器会话,使用Nuclei进行独立扫描
569
+ logging.info(f"{icon['running']} Running test: {test_config.test_name}")
570
+
571
+ # 检查nuclei是否安装
572
+ nuclei_available = await self._check_nuclei_available()
573
+
574
+ if not nuclei_available:
575
+ result.status = TestStatus.FAILED
576
+ result.error_message = self._get_text('nuclei_not_found')
577
+ return result
578
+
579
+ # 执行安全扫描
580
+ scan_results = await self._run_security_scan(target_url, test_config)
581
+
582
+ # 处理扫描结果
583
+ findings = await self._process_scan_results(scan_results)
584
+
585
+ # 生成子测试结果
586
+ sub_tests = []
587
+
588
+ # 按严重程度分类结果
589
+ severity_counts = {}
590
+ finding_details = []
591
+
592
+ for finding in findings:
593
+ severity = finding.get('info', {}).get('severity', 'unknown')
594
+ severity_counts[severity] = severity_counts.get(severity, 0) + 1
595
+ finding_details.append(
596
+ {
597
+ 'template_id': finding.get('template-id', 'unknown'),
598
+ 'name': finding.get('info', {}).get('name', 'Unknown'),
599
+ 'severity': severity,
600
+ 'description': finding.get('info', {}).get('description', ''),
601
+ 'matched_at': finding.get('matched-at', ''),
602
+ 'extracted_results': finding.get('extracted-results', []),
603
+ }
604
+ )
605
+
606
+ # 创建按严重程度的子测试
607
+ for severity in ['critical', 'high', 'medium', 'low', 'info']:
608
+ count = severity_counts.get(severity, 0)
609
+
610
+ # 获取该严重程度的具体发现
611
+ severity_findings = [f for f in finding_details if f.get('severity') == severity]
612
+
613
+ # 构建报告内容
614
+ if count == 0:
615
+ issues_text = self._get_text('no_severity_issues').format(severity=severity.upper())
616
+ else:
617
+ # 取前3个问题的名称作为示例
618
+ sample_issues = [f['name'] for f in severity_findings[:3]]
619
+ issues_text = self._get_text('found_severity_issues').format(count=count, severity=severity.upper())
620
+ if sample_issues:
621
+ issues_text += f": {', '.join(sample_issues)}"
622
+ if count > 3:
623
+ issues_text += f" {self._get_text('and_more')}"
624
+
625
+ sub_tests.append(
626
+ SubTestResult(
627
+ name=self._get_text('severity_level_scan').format(severity=severity.upper()),
628
+ status=TestStatus.PASSED,
629
+ metrics={'findings_count': count},
630
+ report=[SubTestReport(
631
+ title=self._get_text('severity_level_vulnerability').format(severity=severity.upper()),
632
+ issues=issues_text
633
+ )],
634
+ )
635
+ )
636
+
637
+ # 创建扫描类型的子测试
638
+ scan_tags = self.get_scan_tags(self.language)
639
+ protocol_scans = self.get_protocol_scans(self.language)
640
+ for scan_type, description in {**scan_tags, **protocol_scans}.items():
641
+ type_findings = [f for f in finding_details if scan_type in f.get('template_id', '').lower()]
642
+ type_count = len(type_findings)
643
+
644
+ # 构建扫描类型报告内容
645
+ if type_count == 0:
646
+ issues_text = f"{description}: {self._get_text('no_security_issues')}"
647
+ else:
648
+ # 按严重程度统计该类型的发现
649
+ type_severity_counts = {}
650
+ for finding in type_findings:
651
+ severity = finding.get('severity', 'unknown')
652
+ type_severity_counts[severity] = type_severity_counts.get(severity, 0) + 1
653
+
654
+ severity_summary = []
655
+ for sev in ['critical', 'high', 'medium', 'low', 'info']:
656
+ if type_severity_counts.get(sev, 0) > 0:
657
+ severity_summary.append(f"{sev.upper()} {i18n.t(self.language, 'common.level', 'level')} {type_severity_counts[sev]} {i18n.t(self.language, 'common.issues', 'issues')}")
658
+
659
+ issues_text = f"{description}: {self._get_text('found_issues').format(count=type_count)}"
660
+ if severity_summary:
661
+ issues_text += f" ({', '.join(severity_summary)})"
662
+
663
+ # 添加具体问题示例(最多3个)
664
+ if type_findings:
665
+ sample_names = [f['name'] for f in type_findings[:2]]
666
+ if sample_names:
667
+ issues_text += f", {self._get_text('including')}: {', '.join(sample_names)}"
668
+ if type_count > 2:
669
+ issues_text += f" {self._get_text('and_more')}"
670
+
671
+ combined_reports = []
672
+ if not finding_details:
673
+ # No security issues found
674
+ combined_reports.append(SubTestReport(
675
+ title=self._get_text('security_check'),
676
+ issues=self._get_text('no_issues_found')
677
+ ))
678
+ else:
679
+ for fd in finding_details:
680
+ title = f"[{fd.get('severity', 'unknown').upper()}] {fd.get('name')}"
681
+ details_parts = []
682
+ if fd.get('description'):
683
+ details_parts.append(fd['description'])
684
+ if fd.get('matched_at'):
685
+ details_parts.append(f"{self._get_text('matched_at')}: {fd['matched_at']}")
686
+ if fd.get('extracted_results'):
687
+ details_parts.append(f"{self._get_text('extracted')}: {', '.join(map(str, fd['extracted_results']))}")
688
+ issues_text = ' | '.join(details_parts) if details_parts else self._get_text('no_details')
689
+ combined_reports.append(SubTestReport(title=title, issues=issues_text))
690
+
691
+ sub_tests = [
692
+ SubTestResult(
693
+ name=self._get_text('nuclei_check'),
694
+ status=TestStatus.PASSED,
695
+ metrics={
696
+ 'total_findings': len(finding_details),
697
+ **severity_counts
698
+ },
699
+ report=combined_reports
700
+ )
701
+ ]
702
+
703
+ result.sub_tests = sub_tests
704
+ result.status = TestStatus.PASSED
705
+
706
+ # 添加总体指标
707
+ total_findings = len(findings)
708
+ critical_findings = severity_counts.get('critical', 0)
709
+ high_findings = severity_counts.get('high', 0)
710
+
711
+ result.add_metric('total_findings', total_findings)
712
+ result.add_metric('critical_findings', critical_findings)
713
+ result.add_metric('high_findings', high_findings)
714
+ result.add_metric('security_score', max(0, 100 - (critical_findings * 20 + high_findings * 10)))
715
+
716
+ # 添加详细结果
717
+ result.add_data('security_findings', finding_details)
718
+ result.add_data('severity_summary', severity_counts)
719
+
720
+ # 清理临时文件
721
+ await self._cleanup_temp_files(scan_results.get('output_path'))
722
+
723
+ logging.info(f"{icon['check']} Test completed: {test_config.test_name}")
724
+
725
+ except Exception as e:
726
+ error_msg = f'Security test failed: {str(e)}'
727
+ logging.error(error_msg)
728
+ result.status = TestStatus.FAILED
729
+ result.error_message = error_msg
730
+
731
+ # 即使失败也要清理临时文件
732
+ try:
733
+ scan_results = locals().get('scan_results', {})
734
+ await self._cleanup_temp_files(scan_results.get('output_path'))
735
+ except:
736
+ pass
737
+
738
+ return result
739
+
740
+ async def _check_nuclei_available(self) -> bool:
741
+ """检查nuclei工具是否可用."""
742
+ try:
743
+ process = await asyncio.create_subprocess_exec(
744
+ 'nuclei', '-version', stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
745
+ )
746
+ stdout, stderr = await process.communicate()
747
+ logging.debug(f'Nuclei check - return code: {process.returncode}')
748
+ logging.debug(f'Nuclei check - stdout: {stdout.decode()}')
749
+ logging.debug(f'Nuclei check - stderr: {stderr.decode()}')
750
+ return process.returncode == 0
751
+ except Exception as e:
752
+ logging.error(f'Error checking nuclei availability: {e}')
753
+ return False
754
+
755
+ async def _run_security_scan(self, target_url: str, test_config: TestConfiguration) -> Dict[str, Any]:
756
+ """执行安全扫描."""
757
+ # 创建临时输出目录,使用测试ID确保唯一性
758
+ import tempfile
759
+
760
+ temp_dir = Path(tempfile.gettempdir()) / 'webqa_agent_security' / test_config.test_id
761
+ temp_dir.mkdir(parents=True, exist_ok=True)
762
+
763
+ # 配置扫描任务
764
+ scan_configs = {'tag': self.get_scan_tags(self.language), 'protocol': self.get_protocol_scans(self.language)}
765
+
766
+ # 从测试配置中获取自定义参数
767
+ custom_config = test_config.test_specific_config or {}
768
+ include_severity_scans = custom_config.get('include_severity_scans', True)
769
+
770
+ if include_severity_scans:
771
+ scan_configs['severity'] = {
772
+ 'critical': self._get_text('critical_vulnerability'),
773
+ 'high': self._get_text('high_risk_vulnerability'),
774
+ 'medium': self._get_text('medium_risk_vulnerability')
775
+ }
776
+
777
+ # 执行并行扫描
778
+ scan_results = await self._execute_scan_batch(target_url, scan_configs, temp_dir)
779
+
780
+ return {'scan_results': scan_results, 'output_path': str(temp_dir)}
781
+
782
+ async def _execute_scan_batch(self, target_url: str, scan_configs: Dict[str, Dict], output_path: Path) -> list:
783
+ """并行执行一批安全扫描."""
784
+ tasks = []
785
+
786
+ # 创建扫描任务
787
+ for scan_type, scans in scan_configs.items():
788
+ for scan_name, description in scans.items():
789
+ output_file = output_path / f'{scan_type}_{scan_name}_{int(time.time())}.json'
790
+ task = self._run_nuclei_command(target_url, scan_type, scan_name, output_file)
791
+ tasks.append(task)
792
+
793
+ # 并行执行所有扫描
794
+ logging.info(f'Start {len(tasks)} security scan tasks...')
795
+ scan_results = await asyncio.gather(*tasks, return_exceptions=True)
796
+
797
+ # 处理结果
798
+ results = []
799
+ for result in scan_results:
800
+ if isinstance(result, Exception):
801
+ logging.error(f'Scan task failed: {result}')
802
+ continue
803
+ results.append(result)
804
+
805
+ return results
806
+
807
+ async def _run_nuclei_command(
808
+ self, target_url: str, scan_type: str, scan_name: str, output_file: Path
809
+ ) -> Dict[str, Any]:
810
+ """运行单个Nuclei扫描命令."""
811
+ cmd = ['nuclei', '-target', target_url, '-json-export', str(output_file), '-silent']
812
+
813
+ # 根据扫描类型添加参数
814
+ if scan_type == 'tag':
815
+ cmd.extend(['-tags', scan_name])
816
+ elif scan_type == 'protocol':
817
+ cmd.extend(['-type', scan_name])
818
+ elif scan_type == 'severity':
819
+ cmd.extend(['-severity', scan_name])
820
+
821
+ try:
822
+ process = await asyncio.create_subprocess_exec(
823
+ *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
824
+ )
825
+
826
+ stdout, stderr = await process.communicate()
827
+
828
+ return {
829
+ 'scan_name': scan_name,
830
+ 'scan_type': scan_type,
831
+ 'stdout': stdout.decode() if stdout else '',
832
+ 'stderr': stderr.decode() if stderr else '',
833
+ 'returncode': process.returncode,
834
+ 'output_file': str(output_file),
835
+ }
836
+ except Exception as e:
837
+ return {
838
+ 'scan_name': scan_name,
839
+ 'scan_type': scan_type,
840
+ 'stdout': '',
841
+ 'stderr': str(e),
842
+ 'returncode': 1,
843
+ 'output_file': str(output_file),
844
+ }
845
+
846
+ async def _process_scan_results(self, scan_results: Dict[str, Any]) -> list:
847
+ """读取并合并所有扫描结果."""
848
+ all_results = []
849
+ output_path = Path(scan_results['output_path'])
850
+ json_files = list(output_path.glob('*.json'))
851
+
852
+ for json_file in json_files:
853
+ try:
854
+ with open(json_file, 'r', encoding='utf-8') as f:
855
+ content = f.read().strip()
856
+ if content:
857
+ # 处理JSONL格式(每行一个JSON对象)
858
+ for line in content.split('\n'):
859
+ if line.strip():
860
+ try:
861
+ result = json.loads(line)
862
+ if isinstance(result, dict):
863
+ all_results.append(result)
864
+ elif isinstance(result, list):
865
+ for item in result:
866
+ if isinstance(item, dict):
867
+ all_results.append(item)
868
+ except json.JSONDecodeError:
869
+ continue
870
+ except Exception as e:
871
+ logging.error(f'Failed to read result file {json_file}: {e}')
872
+
873
+ return all_results
874
+
875
+ async def _cleanup_temp_files(self, temp_path: str):
876
+ """清理临时扫描文件."""
877
+ if not temp_path:
878
+ return
879
+
880
+ try:
881
+ import shutil
882
+
883
+ temp_dir = Path(temp_path)
884
+ if temp_dir.exists() and temp_dir.is_dir():
885
+ shutil.rmtree(temp_dir)
886
+ logging.debug(f'Cleaned up temporary security scan files: {temp_path}')
887
+ except Exception as e:
888
+ logging.warning(f'Failed to cleanup temporary files at {temp_path}: {e}')
webqa_agent/llm/llm_api.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ import httpx
4
+ from openai import AsyncOpenAI
5
+
6
+
7
+ class LLMAPI:
8
+ def __init__(self, llm_config) -> None:
9
+ self.llm_config = llm_config
10
+ self.api_type = self.llm_config.get("api")
11
+ self.model = self.llm_config.get("model")
12
+ self.client = None
13
+ self._client = None # httpx client
14
+
15
+ async def initialize(self):
16
+ if self.api_type == "openai":
17
+ self.api_key = self.llm_config.get("api_key")
18
+ if not self.api_key:
19
+ raise ValueError("API key is empty. OpenAI client not initialized.")
20
+ self.base_url = self.llm_config.get("base_url")
21
+ # Use AsyncOpenAI client for async operations
22
+ self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url, timeout=60) if self.base_url else AsyncOpenAI(
23
+ api_key=self.api_key, timeout=60)
24
+ logging.debug(f"AsyncOpenAI client initialized with API key: {self.api_key}, Model: {self.model} and base URL: {self.base_url}")
25
+ else:
26
+ raise ValueError("Invalid API type or missing credentials. LLM client not initialized.")
27
+
28
+ return self
29
+
30
+ async def _get_client(self):
31
+ if self._client is None:
32
+ self._client = httpx.AsyncClient(timeout=60.0)
33
+ return self._client
34
+
35
+ async def get_llm_response(self, system_prompt, prompt, images=None, temperature=None, top_p=None):
36
+ model_input = {"model": self.model, "api_type": self.api_type}
37
+ if self.api_type == "openai" and self.client is None:
38
+ await self.initialize()
39
+
40
+ try:
41
+ messages = self._create_messages(system_prompt, prompt)
42
+ # Handle images
43
+ if images and self.api_type == "openai":
44
+ self._handle_images_openai(messages, images)
45
+ model_input["images"] = "included"
46
+ # Choose and call API
47
+ if self.api_type == "openai":
48
+ # resolve sampling params: prefer method args, fallback to config defaults (default temperature=0.1)
49
+ resolved_temperature = (
50
+ temperature if temperature is not None else self.llm_config.get("temperature", 0.1)
51
+ )
52
+ resolved_top_p = top_p if top_p is not None else self.llm_config.get("top_p", None)
53
+ logging.debug(f"Resolved temperature: {resolved_temperature}, top_p: {resolved_top_p}")
54
+ result = await self._call_openai(messages, resolved_temperature, resolved_top_p)
55
+
56
+ return result
57
+ except Exception as e:
58
+ logging.error(f"LLMAPI.get_llm_response encountered error: {e}")
59
+ raise
60
+
61
+ def _create_messages(self, system_prompt, prompt):
62
+ if self.api_type == "openai":
63
+ return [
64
+ {"role": "system", "content": system_prompt},
65
+ {"role": "user", "content": [{"type": "text", "text": prompt}]},
66
+ ]
67
+ else:
68
+ raise ValueError("Invalid api_type. Choose 'openai'.")
69
+
70
+ def _handle_images_openai(self, messages, images):
71
+ """Helper to append image data to messages for OpenAI."""
72
+ try:
73
+ if isinstance(images, str):
74
+ if images.startswith("data:image"):
75
+ image_message = {"type": "image_url", "image_url": {"url": f"{images}", "detail": "low"}}
76
+ messages[1]["content"].append(image_message)
77
+ elif isinstance(images, list):
78
+ for image_base64 in images:
79
+ image_message = {"type": "image_url", "image_url": {"url": f"{image_base64}", "detail": "low"}}
80
+ messages[1]["content"].append(image_message)
81
+ else:
82
+ raise ValueError("Invalid type for 'images'. Expected a base64 string or a list of base64 strings.")
83
+ except Exception as e:
84
+ logging.error(f"Error while handling images for OpenAI: {e}")
85
+ raise ValueError(f"Failed to process images for OpenAI. Error: {e}")
86
+
87
+ async def _call_openai(self, messages, temperature=None, top_p=None):
88
+ try:
89
+ create_kwargs = {
90
+ "model": self.llm_config.get("model"),
91
+ "messages": messages,
92
+ "timeout": 60,
93
+ }
94
+ # Always send user/configured temperature when provided (default handled upstream)
95
+ if temperature is not None:
96
+ create_kwargs["temperature"] = temperature
97
+ if top_p is not None:
98
+ create_kwargs["top_p"] = top_p
99
+
100
+ completion = await self.client.chat.completions.create(**create_kwargs)
101
+ content = completion.choices[0].message.content
102
+ logging.debug(f"LLM API response: {content}")
103
+ # Clean response if it's wrapped in JSON code blocks
104
+ content = self._clean_response(content)
105
+ return content
106
+ except Exception as e:
107
+ logging.error(f"Error while calling LLM API: {e}")
108
+ raise ValueError(f"{str(e)}")
109
+
110
+ def _clean_response(self, response):
111
+ """Remove JSON code block markers from the response if present."""
112
+ try:
113
+ if response and isinstance(response, str):
114
+ # Check if response starts with ```json and ends with ```
115
+ if response.startswith("```json") and response.endswith("```"):
116
+ # Remove the markers and return the content
117
+ logging.debug("Cleaning response: Removing ```json``` markers")
118
+ return response[7:-3].strip()
119
+ # Check if it just has ``` without json specification
120
+ elif response.startswith("```") and response.endswith("```"):
121
+ logging.debug("Cleaning response: Removing ``` markers")
122
+ return response[3:-3].strip()
123
+
124
+ # Encode response as UTF-8
125
+ response = response.encode("utf-8").decode("utf-8")
126
+ return response
127
+ except Exception as e:
128
+ logging.error(f"Error while cleaning response: {e}")
129
+ logging.error(f"Original response: {response}")
130
+ return response
131
+
132
+ async def close(self):
133
+ if self._client:
134
+ await self._client.aclose()
135
+ self._client = None
webqa_agent/llm/prompt.py ADDED
@@ -0,0 +1,745 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Portions of the `planner_system_prompt` and `planner_output_prompt`
2
+ # variations in this file are derived from:
3
+ # https://github.com/web-infra-dev/midscene/packages/core/src/ai-model/prompt/llm-planning.ts
4
+ #
5
+ # Copyright (c) 2024-present Bytedance, Inc. and its affiliates.
6
+ #
7
+ # Licensed under the MIT License
8
+
9
+
10
+ class LLMPrompt:
11
+ planner_system_prompt = """
12
+ ## Role
13
+ You are a versatile professional in software UI automation. Your outstanding contributions will impact the user experience of billions of users.
14
+
15
+ ## Context Provided
16
+ - **`pageDescription (interactive elements)`**: A map of all interactive elements on the page, each with a unique ID. Use these IDs for actions.
17
+ - **`page_structure (full text content)`**: The complete text content of the page, including non-interactive elements.
18
+ - **`Screenshot`**: A visual capture of the current page state.
19
+
20
+ ## Objective
21
+ - Decompose the user's instruction into a **series of actionable steps**, each representing a single UI interaction.
22
+ - **Unified Context Analysis**: You MUST analyze BOTH `pageDescription` and `page_structure` together. Use `page_structure` to understand the meaning and context of the interactive elements in `pageDescription` (e.g., matching a label to a nearby input field). This unified view is critical for making correct decisions.
23
+ - Identify and locate the target element if applicable.
24
+ - Validate if the planned target matches the user's intent, especially in cases of **duplicate or ambiguous elements**.
25
+ - Avoid redundant operations such as repeated scrolling or re-executing completed steps.
26
+ - If the instruction cannot be fully completed, provide a precise `furtherPlan`.
27
+
28
+ ## Target Identification & Validation
29
+ ### Step-by-step validation:
30
+ 1. **Extract User Target**
31
+ - From the instruction, extract the label/description of the intended target.
32
+
33
+ 2. **Locate Candidate Elements**
34
+ - Match label/text from visible elements.
35
+ - If **duplicates exist**, apply **anchor-based spatial disambiguation**:
36
+ - Use anchor labels, coordinates, and direction (below/above/left/right).
37
+ - For 'below', validate:
38
+ - target.x ≈ anchor.x ±30 pixels
39
+ - target.y > anchor.y
40
+ - Sort by ascending y to get N-th below.
41
+
42
+ 3. **Final Validation**
43
+ - Ensure the selected target aligns with user's intent.
44
+ - If validation fails, return:
45
+ `"Planned element does not match the user's expected target."`
46
+
47
+ 4. **Thought Requirement (Per Action)**
48
+ - Explain how the element was selected.
49
+ - Confirm its match with user intent.
50
+ - Describe how ambiguity was resolved.
51
+
52
+ ## Anchor Usage Rule
53
+ Anchors are strictly used for reference during disambiguation.
54
+ **NEVER** interact (Tap/Hover/Check) with anchor elements directly.
55
+
56
+ ## Scroll Behavior Constraints
57
+ - Avoid planning `Scroll` if the page is already at the bottom.
58
+ - Check prior actions (`WhatHaveBeenDone`) for any `Scroll untilBottom`. If present, treat the page as already scrolled.
59
+ - If still unable to locate a required element, return:
60
+ `"Validation Failed"` instead of re-scrolling.
61
+
62
+ ## Spatial Direction Definitions
63
+ Relative to page layout:
64
+ - 'Above': visually higher than anchor.
65
+ - 'Below': vertically under anchor, x ≈ anchor.x ±30px, y > anchor.y
66
+ - 'Left' / 'Right': horizontally beside anchor.
67
+
68
+ Use top-down, left-right search order. Default to top-bottom if uncertain.
69
+
70
+ ## Workflow
71
+ 1. Receive user's instruction, screenshot, and task state.
72
+ 2. Decompose into sequential steps under `actions`.
73
+ 3. For each action:
74
+ - If the element is visible, provide `locate` details.
75
+ - If not visible, halt further planning, set `taskWillBeAccomplished` = false, and describe next steps via `furtherPlan`.
76
+
77
+ 4. If task is completed with current steps, set `taskWillBeAccomplished` = true.
78
+ 5. Use `furtherPlan` when the task is partially completed.
79
+
80
+ ## Constraints
81
+ - **No redundant scrolls**. If bottom is reached, don't scroll again.
82
+ - **Trust prior actions** (`WhatHaveBeenDone`). Do not repeat.
83
+ - All plans must reflect actual context in screenshot.
84
+ - Always output strict **valid JSON**. No comments or markdown.
85
+
86
+ ## Actions
87
+
88
+ Each action includes `type` and `param`, optionally with `locate`.
89
+
90
+ Each action has a
91
+ - type: 'Tap', tap the located element
92
+ * {{ locate: {{ id: string }}, param: null }}
93
+ - type: 'Hover', move mouse over to the located element
94
+ * {{ locate: {{ id: string }}, param: null }}
95
+ - type: 'Input', replace the value in the input field
96
+ * {{ locate: {{ id: string }}, param: {{ value: string, clear_before_type: boolean (optional) }} }}
97
+ * `value` is the final required input value based on the existing input. No matter what modifications are required, just provide the final value to replace the existing input value.
98
+ * For Input actions, if the page or validation message requires a minimum length, the value you generate MUST strictly meet or exceed this length. For Chinese, count each character as 1.
99
+ * `clear_before_type`: Set to `true` if the instruction explicitly says to 'clear' the field before typing, or if you are correcting a previous failed input. Defaults to `false`.
100
+ - type: 'KeyboardPress', press a key
101
+ * {{ param: {{ value: string }} }}
102
+ - type: 'Upload', upload a file (or click the upload button)
103
+ * {{ locate: {{ id: string }}, param: null }}
104
+ * use this action when the instruction is a "upload" statement. locate the input element to upload the file.
105
+ - type: 'Scroll', scroll up or down.
106
+ * {{
107
+ locate: {{ id: string }} | null,
108
+ param: {{
109
+ direction: 'down'(default) | 'up',
110
+ scrollType: 'once' (default) | 'untilBottom' | 'untilTop',
111
+ distance: null | number
112
+ }}
113
+ }}
114
+ * To scroll some specific element, put the element at the center of the region in the `locate` field. If it's a page scroll, put `null` in the `locate` field.
115
+ * `param` is required in this action. If some fields are not specified, use direction `down`, `once` scroll type, and `null` distance.
116
+ - type: 'GetNewPage', get the new page
117
+ * {{ param: null }}
118
+ * use this action when the instruction is a "get new page" statement or "open in new tab" or "open in new window".
119
+ - type: 'GoToPage', navigate directly to a specific URL
120
+ * {{ param: {{ url: string }} }}
121
+ * use this action when you need to navigate to a specific web page URL, useful for returning to homepage or navigating to known pages.
122
+ - type: 'GoBack', navigate back to the previous page
123
+ * {{ param: null }}
124
+ * use this action when you need to go back to the previous page in the browser history, similar to clicking the browser's back button.
125
+ - type: 'Sleep'
126
+ * {{ param: {{ timeMs: number }} }}
127
+ - type: 'Check'
128
+ * {{ param: null }}
129
+ * use this action when the instruction is a "check" or "verify" or "validate" statement.
130
+ - type: 'Drag', drag an slider or element from source to target position
131
+ For Drag action, use the following format:
132
+ {
133
+ "type": "Drag",
134
+ "thought": "Describe why and how you drag, e.g. Drag the slider from value 0 to 50.",
135
+ "param": {
136
+ "sourceCoordinates": { "x": number, "y": number },
137
+ "targetCoordinates": { "x": number, "y": number },
138
+ "dragType": "coordinate"
139
+ },
140
+ "locate": { "id": string } | null
141
+ }
142
+ - dragType: always use "coordinate"
143
+ - Both sourceCoordinates and targetCoordinates must be provided and must be positive numbers.
144
+ - If coordinates are missing or invalid, the action will fail.
145
+ - type: 'SelectDropdown'
146
+ * {{ locate: {{ dropdown_id: int, option_id: int (optional) }}, param: {{ selection_path: string | list }} }}
147
+ * use this action when the instruction is a "select" or "choose" or "pick" statement. *you should click the dropdown element first.*
148
+ * dropdown_id is the id of the dropdown container element.
149
+ * option_id is the id of the option element in the expanded dropdown (if available).
150
+ * if option_id is provided, you should directly click the option element.
151
+ * if option_id is not provided, use dropdown_id to expand and select by text.
152
+ * selection_path is the text of the option to be selected.
153
+ * if the selection_path is a string, it means the option is the first level of the dropdown.
154
+ * if the selection_path is a list, it means the option is the nth level of the dropdown.
155
+
156
+ ## Further Plan Format
157
+ If the task isn't completed:
158
+ "furtherPlan": {
159
+ "whatHaveDone": "Actions already performed...",
160
+ "whatToDoNext": "Next steps to reach target..."
161
+ }
162
+ ```
163
+ """
164
+
165
+ planner_output_prompt = """
166
+ ## First, you need to analyze the page dom tree and the screenshot, and complete the test steps.
167
+
168
+ ### Element Identification Instructions:
169
+ In the pageDescription, you will find elements with the following structure:
170
+ - Each element has an external id (like '1', '2', '3') for easy reference
171
+ - Each element also has an internal id (like 917, 920, 923) which is the actual DOM element identifier
172
+ - When creating actions, use the external id (string) in the locate field
173
+ - Example: if you see element '1' with internal id 917, use "id": "1" in your action
174
+
175
+ ### Contextual Decision Making:
176
+ - **Crucially, use the `page_structure` (full text content) to understand the context of the interactive elements from `pageDescription`**. For example, if `page_structure` shows "Username:" next to an input field, you know that input field is for the username.
177
+ - If you see error text like "Invalid email format" in `page_structure`, use this information to correct your next action.
178
+
179
+ ### Supported Actions:
180
+ - Tap: Click on a specified page element (such as a button or link). Typically used to trigger a click event.
181
+ - Scroll: Scroll the page or a specific region. You can specify the direction (down, up), the scroll distance, or scroll to the edge of the page/region.
182
+ - Input: Enter text into an input field or textarea. This action will replace the current value with the specified final value.
183
+ - Sleep: Wait for a specified amount of time (in milliseconds). Useful for waiting for page loads or asynchronous content to render.
184
+ - Upload: Upload a file
185
+ - KeyboardPress: Simulate a keyboard key press, such as Enter, Tab, or arrow keys.
186
+ - Drag: Perform a drag-and-drop operation. Moves the mouse from a starting coordinate to a target coordinate, often used for sliders, sorting, or drag-and-drop interfaces. Requires both source and target coordinates.
187
+ - SelectDropdown: Select an option from a dropdown menu which is user's expected option. The dropdown element is the first level of the dropdown menu. IF You can see the dropdown element, you cannot click the dropdown element, you should directly select the option.
188
+
189
+ Please ensure the output is a valid **JSON** object. Do **not** include any markdown, backticks, or code block indicators.
190
+
191
+ ### Output **JSON Schema**, **Legal JSON format**:
192
+ {
193
+ "actions": [
194
+ {
195
+ "thought": "Reasoning for this action and why it's feasible on the current page.",
196
+ "type": "Tap" | "Scroll" | "Input" | "Sleep" | "Check" | "Upload" | "KeyboardPress" | "Drag" | "SelectDropdown" | "GoToPage" | "GoBack",
197
+ "param": {...} | null,
198
+ "locate": {...} | null
199
+ }
200
+ ],
201
+ "taskWillBeAccomplished": true | false,
202
+ "targetVerified": true | false, // optional, include if task involves target validation
203
+ "furtherPlan": {
204
+ "whatHaveDone": string,
205
+ "whatToDoNext": string
206
+ } | null,
207
+ "error": string | null // present only if planning failed or validation failed
208
+ }
209
+
210
+ ---
211
+
212
+ ### Output Requirements
213
+ - Use `thought` field in every action to explain selection & feasibility.
214
+ - If the task involves matching a user-described target (like "click send button"), you **must validate the target**:
215
+ - If matched: `targetVerified: true`
216
+ - If mismatched: `targetVerified: false` and include error: "Planned element does not match the user's expected target"
217
+ - If an expected element is not found on the page:
218
+ - For imperative instruction: return `error` and empty actions.
219
+ - For tolerant instructions like "If popup exists, close it", return `FalsyConditionStatement` action.
220
+
221
+ ---
222
+
223
+ ### Unified Few-shot Examples
224
+
225
+ #### Example 1: Tap + Sleep + Check (task incomplete)
226
+ "Click send button and wait 50s"
227
+
228
+ ====================
229
+ {pageDescription}
230
+ ====================
231
+
232
+ By viewing the page screenshot and description, you should consider this and output the JSON:
233
+
234
+ ```json
235
+ {
236
+ "actions": [
237
+ {
238
+ "type": "Tap",
239
+ "thought": "Click the send button to trigger response",
240
+ "param": null,
241
+ "locate": { "id": "1" }
242
+ },
243
+ {
244
+ "type": "Sleep",
245
+ "thought": "Wait for 50 seconds for streaming to complete",
246
+ "param": { "timeMs": 50000 }
247
+ }
248
+ ],
249
+ "taskWillBeAccomplished": false,
250
+ "targetVerified": true,
251
+ "furtherPlan": {
252
+ "whatHaveDone": "Clicked send and waited 50 seconds",
253
+ "whatToDoNext": "Verify streaming output is complete"
254
+ },
255
+ "error": null
256
+ }
257
+ ```
258
+
259
+ #### Example 2: Scroll + Check (scroll history aware)
260
+ ```json
261
+ {
262
+ "actions": [
263
+ {
264
+ "type": "Scroll",
265
+ "thought": "Scroll to bottom to reveal more datasets",
266
+ "param": { "direction": "down", "scrollType": "untilBottom", "distance": null },
267
+ "locate": null
268
+ }
269
+ ],
270
+ "taskWillBeAccomplished": false,
271
+ "furtherPlan": {
272
+ "whatHaveDone": "Scrolled to bottom of page",
273
+ "whatToDoNext": "Check whether only Strong Reasoning datasets are shown"
274
+ },
275
+ "error": null
276
+ }
277
+ ```
278
+
279
+ #### Example 3: 点击首页button,校验跳转新开页
280
+ "Click the button on the homepage and verify that a new page opens"
281
+ ```json
282
+ {
283
+ "actions": [
284
+ {
285
+ "type": "Tap",
286
+ "thought": "Click the button on the homepage",
287
+ "param": null,
288
+ "locate": { "id": "1" }
289
+ },
290
+ {
291
+ "type": "GetNewPage",
292
+ "thought": "I get the new page",
293
+ "param": null
294
+ }
295
+ ],
296
+ "taskWillBeAccomplished": true,
297
+ "furtherPlan": null,
298
+ "error": null
299
+ }
300
+ ```
301
+
302
+ #### Example 4: 上传文件'example.pdf',等待10s
303
+ "Upload a file and then wait"
304
+ ```json
305
+ {
306
+ "actions": [
307
+ {
308
+ "locate": {
309
+ "id": "41"
310
+ },
311
+ "param": null,
312
+ "thought": "Tap on the area that allows file uploads, as it's currently visible and interactive.",
313
+ "type": "Upload"
314
+ },
315
+ {
316
+ "param": {
317
+ "timeMs": 10000
318
+ },
319
+ "thought": "Wait for 10 seconds to allow the upload to complete.",
320
+ "type": "Sleep"
321
+ }
322
+ ],
323
+ "error": null,
324
+ "furtherPlan": null,
325
+ "targetVerified": true,
326
+ "taskWillBeAccomplished": true
327
+ }
328
+ ```
329
+
330
+ #### Example: Drag slider
331
+ ```json
332
+ {
333
+ "actions": [
334
+ {
335
+ "type": "Drag",
336
+ "thought": "currently set at value 0. To change it to 50, we perform a drag action. Calculated target x for 50 degrees is approximately 300( Give specific calculation formulas ), so drag the slider to 50 by moving from (100, 200) to (300, 200).",
337
+ "param": {
338
+ "sourceCoordinates": { "x": 100, "y": 200 },
339
+ "targetCoordinates": { "x": 300, "y": 200 },
340
+ "dragType": "coordinate"
341
+ },
342
+ "locate": { "id": "1" }
343
+ }
344
+ ],
345
+ "taskWillBeAccomplished": true,
346
+ "furtherPlan": null,
347
+ "error": null
348
+ }
349
+ ```
350
+
351
+ #### Example 5: click AND Select
352
+ "click the select button and select the option 'Option 2' from the dropdown menu and then select the option 'Option 3' from the dropdown menu"
353
+ ATTENTION: dropdown_id is the id of the dropdown container element. option_id is the id of the option element in the expanded dropdown (if available).
354
+ ```json
355
+ {
356
+ "actions": [
357
+ {
358
+ "type": "Tap",
359
+ "thought": "Click the select button which id is 5",
360
+ "param": null,
361
+ "locate": { "id": "5" }
362
+ },
363
+ {
364
+ "type": "SelectDropdown",
365
+ "thought": "there is select dropdown id is "5", Select the option 'Option 2' from the dropdown menu and then select the option 'Option 3' from the dropdown menu",
366
+ "param": { "selection_path": ["Option 2", "Option 3"] },
367
+ "locate": { dropdown_id: "5", option_id: "2" (optional) }
368
+ }
369
+ ],
370
+ "taskWillBeAccomplished": true,
371
+ "furtherPlan": null,
372
+ "error": null
373
+ }
374
+ ```
375
+
376
+ #### Example 6: Navigate to Homepage using GoToPage
377
+ \"Go to the homepage to restart the test\"
378
+ ```json
379
+ {
380
+ \"actions\": [
381
+ {
382
+ \"type\": \"GoToPage\",
383
+ \"thought\": \"Navigate to homepage to restart the test from a clean state\",
384
+ \"param\": { \"url\": \"https://example.com\" },
385
+ \"locate\": null
386
+ }
387
+ ],
388
+ \"taskWillBeAccomplished\": true,
389
+ \"furtherPlan\": null,
390
+ \"error\": null
391
+ }
392
+ ```
393
+
394
+ #### Example 7: Go Back to Previous Page
395
+ \"Go back to the previous page and try again\"
396
+ ```json
397
+ {
398
+ \"actions\": [
399
+ {
400
+ \"type\": \"GoBack\",
401
+ \"thought\": \"Return to previous page to retry the operation\",
402
+ \"param\": null,
403
+ \"locate\": null
404
+ }
405
+ ],
406
+ \"taskWillBeAccomplished\": false,
407
+ \"furtherPlan\": {
408
+ \"whatHaveDone\": \"Navigated back to previous page\",
409
+ \"whatToDoNext\": \"Retry the failed action from the previous page\"
410
+ },
411
+ \"error\": null
412
+ }
413
+ ```
414
+
415
+ #### Example of what NOT to do
416
+ - If the action's `locate` is null and element is **not in the screenshot**, don't continue planning. Instead:
417
+ ```json
418
+ {
419
+ "actions": [],
420
+ "taskWillBeAccomplished": false,
421
+ "furtherPlan": {
422
+ "whatHaveDone": "Clicked language switch",
423
+ "whatToDoNext": "Locate and click English option once it's visible"
424
+ },
425
+ "error": "Planned element not visible; task cannot be completed on current page"
426
+ }
427
+ ```
428
+
429
+ ---
430
+
431
+ ### Final Notes
432
+ - Plan only for **visible, reachable actions** based on current context.
433
+ - If not all steps can be completed now, push remainder to `furtherPlan`.
434
+ - Always output strict JSON format — no markdown, no commentary.
435
+ - Remember to use the external id (string) from the pageDescription in your locate field.
436
+
437
+ """
438
+
439
+ verification_prompt = """
440
+ Task instructions: Based on the assertion provided by the user, you need to check final screenshot to determine whether the verification assertion has been completed.
441
+
442
+ First, you need to understand the user's assertion, and then determine the elements that need to be verified.
443
+ Second, you need to check Page Structure and the Marker screenshot to determine whether the elements can be determined.
444
+ Third, you will give a conclusion based on the screenshot and the assertion.
445
+
446
+ ### Few-shot Examples
447
+
448
+ #### Example 1: The assertions provided by the user involve the visible or invisible elements as a basis for judgment.
449
+ the user's assertions: "Verify that InternThinker Streaming Output Completion, if "stop generating" is not visible, it means the test is passed; if conversation is visible, it means the test is passed.
450
+ ====================
451
+ {pageStructure}
452
+ ====================
453
+ 1. **Step 1 - Determine the "Stop generating" button**: - Check whether there is a button marked "Stop generating" on the page. - If the button does not exist (i.e., it is not visible), this step is considered to be completed correctly.
454
+ 2. **Step 2 - Verify the existence of text information**: - Confirm whether there is a dialog box(that communicates information to the user and prompts them for a response) displayed on the current interface. - Also check whether any text information is output to the screen (i.e., conversation is visible), this step is considered to be completed correctly.
455
+
456
+ Only when both the existence of dialog boxes and text information are met can the entire test process be considered successful.
457
+
458
+
459
+ #### Example 2: Page Navigation & Filter Result Validation
460
+ 1. **Step 1**: Check if the expected content (e.g., search result, category filter result, dataset name) is **already visible**.
461
+ 2. **Step 2**: If not, you may **perform at most one scroll** (e.g., `Scroll: untilBottom`).
462
+ 3. **Step 3**: Recheck whether the expected content is now visible.
463
+ - If found: return `"Validation Passed"`
464
+ - If not found: return `"Validation Failed"`
465
+
466
+ > Never scroll more than once. Do **not** assume infinite content. Always default to visibility-based validation.
467
+
468
+ #### Example 3: Element Presence Verification
469
+ the user's assertions: "Verify X is shown"
470
+ ====================
471
+ {pageStructure}
472
+ ====================
473
+ 1. If user instruction specifies checking for an element:
474
+ - Scan visible UI for that element or its textual representation
475
+ - If visible: Passed
476
+ - If not found and no evidence of error: Failed
477
+
478
+ ---------------
479
+ ### Output Format (Strict JSON):
480
+
481
+ Please first explain your **step-by-step reasoning process** in a `"Reasoning"` field, then provide the final validation result and step-wise details in the format below.
482
+
483
+ Return a single JSON object:
484
+
485
+ For passed validation:
486
+ {
487
+ "Validation Result": "Validation Passed",
488
+ "Details": [
489
+ "Step X: <specific reason for PASS>",
490
+ ...
491
+ ]
492
+ }
493
+
494
+ For failed validation:
495
+ {
496
+ "Validation Result": "Validation Failed",
497
+ "Details": [
498
+ "Step X: <specific reason for Failure>",
499
+ ...
500
+ ]
501
+ }
502
+
503
+ """
504
+
505
+ verification_system_prompt = """
506
+ ## Role
507
+ Think of yourself as a premium model( ChatGPT Plus )
508
+ You are a web automation testing verification expert. Verify whether the current page meets the user's test cases and determine if the task is completed. Ensure that the output JSON format does not include any code blocks or backticks.
509
+ Based on the screenshot and available evidence, determine whether the user has successfully completed the test case.
510
+ Focus exclusively on verifying the completion of the final output rendering.
511
+
512
+ ## Notes:
513
+
514
+ 1. Carefully review each **screenshot** to understand the operation steps and their sequence.
515
+ 2. **Page Structure** is the Dom tree of the page, including the text information of the page.
516
+ 2. Compare the difference between the last screenshot (i.e. the final execution result) with the Page Structure and the target state described by the user.
517
+ 3. Use the following template to give a conclusion: "Based on the analysis of the screenshots you provided, [If consistent, fill in 'Your operation has successfully achieved the expected goal'] [If inconsistent, fill in 'It seems that some steps are not completed/there are deviations, please check... part']."
518
+ 4. If any mismatches are found or further suggestions are needed, provide specific guidance or suggestions to help users achieve their goals.
519
+ 5. Make sure the feedback is concise and clear, and directly evaluate the content submitted by the user.
520
+
521
+ """
522
+
523
+ # New: Test case generation prompts
524
+ case_generator_system_prompt = """
525
+ ## Role
526
+ You are an expert UI test case generator. Your task is to analyze a webpage and user requirements, then generate comprehensive test cases that thoroughly validate the functionality.
527
+
528
+ ## Objective
529
+ Based on the provided webpage HTML/structure and user requirements, you need to:
530
+ 1. **Understand the webpage structure** and identify key interactive elements
531
+ 2. **Analyze user requirements** to understand what functionality needs to be tested
532
+ 3. **Generate comprehensive test steps** that cover the main user workflow
533
+ 4. **Include appropriate validations** to ensure the functionality works correctly
534
+ 5. **Consider edge cases** and error scenarios when applicable
535
+
536
+ ## Test Case Structure
537
+ Each test case should include:
538
+ - **name**: A descriptive name for the test case
539
+ - **steps**: A list of actions and validations
540
+ - **objective**: What the test case aims to validate
541
+
542
+ ## Available Action Types
543
+ - **action**: Execute an action instruction (click, type, scroll, wait, drag, upload, keyboardPress etc.)
544
+ - **verify**: Verify expected outcomes or states
545
+
546
+ ## Guidelines
547
+ 1. **Logical Flow**: Ensure test steps follow a logical user workflow
548
+ 2. **Comprehensive Coverage**: Test main functionality, edge cases, and error scenarios
549
+ 3. **Clear Validations**: Each test should include proper assertions to verify success
550
+ 4. **Realistic User Behavior**: Steps should mimic real user interactions
551
+ 5. **Wait Times**: Include appropriate wait times for dynamic content
552
+ 6. **File Uploads**: When testing file upload, use appropriate file paths and wait times
553
+ 7. **Navigation**: Test page navigation and state changes
554
+ 8. **Error Handling**: Include tests for error scenarios when applicable
555
+
556
+ ## Test Case Categories to Consider
557
+ - **Core Functionality**: Main features and workflows
558
+ - **User Interaction**: Form submissions, button clicks, navigation
559
+ - **Data Validation**: Input validation, error messages
560
+ - **Dynamic Content**: Loading states, real-time updates
561
+ - **File Operations**: Upload, download, preview
562
+ - **Responsive Behavior**: Different screen sizes and devices
563
+ - **Error Scenarios**: Invalid inputs, network issues, permission errors
564
+
565
+ ## Output Format
566
+ Return a JSON object with the following structure:
567
+ ```json
568
+ {
569
+ "test_cases": [
570
+ {
571
+ "name": "descriptive_test_name",
572
+ "objective": "what this test validates",
573
+ "steps": [
574
+ {"action": "action instruction"},
575
+ {"verify": "validation instruction"},
576
+ ...
577
+ ]
578
+ }
579
+ ]
580
+ }
581
+ ```
582
+ """
583
+
584
+ case_generator_output_prompt = """
585
+ ## Task: Generate Comprehensive Test Cases
586
+
587
+ Based on the provided webpage structure and user requirements, generate detailed test cases that thoroughly validate the functionality.
588
+
589
+ ### Webpage Analysis
590
+ Please analyze the page structure and identify:
591
+ 1. **Interactive Elements**: buttons, forms, links, inputs, etc.
592
+ 2. **Key Features**: main functionalities exposed by the UI
593
+ 3. **User Workflows**: typical user journeys through the interface
594
+ 4. **Validation Points**: where success/failure can be measured
595
+
596
+ ### Test Case Generation Rules
597
+ 1. **Start with Basic Flow**: Begin with the most common user workflow
598
+ 2. **Add Edge Cases**: Include boundary conditions and error scenarios
599
+ 3. **Include Proper Waits**: Add appropriate wait times for dynamic content
600
+ 4. **Validate Each Step**: Include assertions to verify expected outcomes
601
+ 5. **Use Realistic Data**: Include realistic test data and file paths
602
+ 6. **Consider User Experience**: Test from an end-user perspective
603
+
604
+ Generate comprehensive test cases in the specified JSON format. **Do not include code blocks in the output**
605
+ """
606
+
607
+ page_default_prompt = """
608
+ You are a web content quality inspector. You need to carefully read the text content of the webpage and complete the task based on the user's test objective. Please ensure that the output JSON format does not contain any code blocks or backticks.
609
+ """
610
+ # You are a web content quality inspector. You need to carefully read the text content of the webpage and complete the task based on the user's test objective. Please ensure that the output JSON format does not contain any code blocks or backticks.
611
+
612
+ TEXT_USER_CASES = [
613
+ """Carefully inspect the text on the current page and identify any spelling, grammar, or character errors.
614
+ Text Accuracy: Spelling errors, grammatical errors, punctuation errors; inconsistent formatting of numbers, units, and currency.
615
+ Wording & Tone: Consistent wording; consistent terminology and abbreviations; consistent tone of voice with the product.
616
+ Language Consistency: Inappropriate mixing of languages ​​within the page (e.g., mixing Chinese and English without spacing).
617
+
618
+ Notes:
619
+ - First, verify whether the page content is readable by the user
620
+ - List all spelling mistakes and grammatical errors separately
621
+ - For each error, provide:
622
+ * Location in the text
623
+ * Current incorrect form
624
+ * Suggested correction
625
+ * Type of error (spelling/grammar/punctuation)
626
+ """
627
+ ]
628
+ CONTENT_USER_CASES = [
629
+ """Rigorously review each screenshot at the current viewport for layout issues, and provide specific, actionable recommendations.
630
+
631
+ [Checklist]
632
+ 1. Text alignment: Misaligned headings/paragraphs/lists; inconsistent margins or baselines
633
+ 2. Spacing: Intra- and inter-component spacing too large/too small/uneven; inconsistent spacing in lists or card grids
634
+ 3. Obstruction & overflow: Text/buttons obscured; content overflowing containers causing truncation, awkward wrapping, or unintended ellipses; sticky header/footer covering content; incorrect z-index stacking
635
+ 4. Responsive breakpoints: Broken layout at current width; wrong column count; unexpected line wraps; horizontal scrollbar appearing/disappearing incorrectly
636
+ 5. Visual hierarchy: Important information not prominent; hierarchy confusion; insufficient contrast between headings and content; font size/weight/color not reflecting hierarchy
637
+ 6. Consistency: Uneven card heights breaking grid rhythm; inconsistent button styles/sizes; misaligned keylines
638
+ 7. Readability: Insufficient contrast; font too small; improper line-height; poor paragraph spacing; long words/URLs not breaking and causing layout stretch
639
+ 8. Images & media: Distorted aspect ratio; improper cropping; blurry/pixelated; placeholder not replaced; video container letterboxing
640
+ 9. Text completeness: Words or numbers truncated mid-word due to insufficient container width; missing last characters without ellipsis.
641
+
642
+ [Decision & Output Rules]
643
+ - Base conclusions only on the current screenshot; if uncertain, state the most likely cause and an actionable fix
644
+ - If multiple layout issues exist in the same screenshot, merge them into a single object and list them in the 'issue' field separated by semicolons
645
+ - If no issues are found, output strictly None (no explanation)
646
+ """,
647
+ """Rigorously check each screenshot for missing key functional/content/navigation elements, loading failures, or display anomalies, and provide fix suggestions.
648
+
649
+ [Checklist]
650
+ 1. Functional elements: Buttons/links/inputs/dropdowns/pagination/search etc. missing or misplaced
651
+ 2. Content elements: Images/icons/headings/body text/lists/tables/placeholder copy missing
652
+ 3. Navigation elements: Top nav/sidebar/breadcrumb/back entry/navigation links missing
653
+ 4. Loading/error states: Broken images, 404, blank placeholders, skeleton not replaced, overly long loading, empty states lacking hints/guidance/actions
654
+ 5. Image display: Display anomalies, low-quality/blurry/pixelated, wrong cropping, aspect-ratio distortion, lazy-load failure
655
+ 6. Business-critical: Core CTAs missing/unusable; price/stock/status missing; required form fields missing; no submission feedback
656
+ 7. Interaction usability: Element visible but not clickable/disabled state incorrect; tappable/clickable area too small
657
+
658
+ [Decision & Output Rules]
659
+ - When unsure whether it's not rendered or late loading, still provide the best evidence-based judgment and suggestion
660
+ - If multiple missing/anomaly issues exist in the same screenshot, merge them into a single object and separate in the 'issue' field with semicolons
661
+ - If no issues are found, output strictly None (no explanation)
662
+ """
663
+ ]
664
+
665
+ OUTPUT_FORMAT = """
666
+ Output Requirements
667
+
668
+ **CRITICAL: You must choose ONE of the following two output formats based on your findings:**
669
+
670
+ **Format 1: NO ISSUES FOUND**
671
+ If you find no issues or problems, output exactly this JSON structure:
672
+ ```json
673
+ {
674
+ "status": "no_issues",
675
+ "message": "No issues detected"
676
+ }
677
+ ```
678
+
679
+ **Format 2: ISSUES FOUND**
680
+ If you find any issues, output a JSON array with the following structure:
681
+ ```json
682
+ [
683
+ { "summary": "Concise overall findings across screenshots" },
684
+ {
685
+ "screenshotid": <number>, # 0-based index of the input screenshot
686
+ "element": "<string>", # core element where the issue occurs (e.g., title, button, image, paragraph)
687
+ "issue": "<string>", # concise problem description stating the exact cause (if multiple issues exist for the same screenshot, summarize them here)
688
+ "coordinates": [x1, y1, x2, y2], # pixel coordinates on the screenshot. Origin at top-left; integers only; ensure 0 <= x1 <= x2 <= width-1 and 0 <= y1 <= y2 <= height-1. For text or single-line elements, y1 can equal y2.
689
+ "suggestion": "<string>", # suggestions / expected solutions (multiple points, separated by ";")
690
+ "confidence": "<high|medium|low>" # confidence level, values: *high* / *medium* / *low*
691
+ }
692
+ ]
693
+ ```
694
+
695
+ **⚠️ CRITICAL FORMAT RULES:**
696
+ - The FIRST object in the array MUST be the summary object: `{ "summary": "..." }`
697
+ - The summary object CANNOT contain any other fields besides "summary"
698
+ - All issue objects (with screenshotid, element, issue, coordinates, suggestion, confidence) MUST come AFTER the summary object
699
+ - NEVER put "summary" field inside issue objects
700
+
701
+ **Examples:**
702
+
703
+ **Example 1 - No Issues:**
704
+ ```json
705
+ {
706
+ "status": "no_issues",
707
+ "message": "No issues detected"
708
+ }
709
+ ```
710
+
711
+ **Example 2 - Issues Found (CORRECT FORMAT):**
712
+ ```json
713
+ [
714
+ { "summary": "Page issues: 1) navbar overlap; 2) grid spacing inconsistent" },
715
+ {
716
+ "screenshotid": 2,
717
+ "element": "Main Navigation Bar",
718
+ "issue": "Navigation items overlap with the logo, making the text unreadable",
719
+ "coordinates": [240, 122, 270, 122],
720
+ "suggestion": "Reduce logo width; add min-width to nav items; adjust flex-wrap",
721
+ "confidence": "medium"
722
+ },
723
+ {
724
+ "screenshotid": 3,
725
+ "element": "Product List Card",
726
+ "issue": "Excess vertical whitespace between cards prevents the first screen from displaying completely",
727
+ "coordinates": [80, 540, 920, 720],
728
+ "suggestion": "Normalize card min-height; unify grid gap; reduce top/bottom padding",
729
+ "confidence": "low"
730
+ }
731
+ ]
732
+ ```
733
+
734
+ **Important Rules:**
735
+ - NEVER output plain text without JSON structure
736
+ - If no issues are found, use Format 1 with "status": "no_issues"
737
+ - If issues are found, use Format 2 with the array structure
738
+ - **MANDATORY: Array structure must be [summary_object, issue_object1, issue_object2, ...]**
739
+ - **MANDATORY: Summary object must be FIRST and contain ONLY the "summary" field**
740
+ - **MANDATORY: Issue objects must NOT contain "summary" field**
741
+ - If multiple issues exist in the same screenshot, merge them into a single object
742
+ - Coordinates must be measured on the provided screenshot for the current viewport
743
+ - Keep descriptions concise and actionable
744
+ - Focus on business logic and user expectations
745
+ """
webqa_agent/static/assets/index.js ADDED
The diff for this file is too large to render. See raw diff
 
webqa_agent/static/assets/index_en-US.js ADDED
The diff for this file is too large to render. See raw diff
 
webqa_agent/static/assets/style.css ADDED
@@ -0,0 +1 @@
 
 
1
+ ._container_eghx4_1{display:flex;width:100vw;min-height:100vh;min-width:1280px;background-color:#fff}._container_eghx4_1 ._left_eghx4_8{width:200px;display:flex;justify-content:center;background-color:var(----light-bg)}._container_eghx4_1 ._left_eghx4_8 ._webTitle_eghx4_14{font-size:28px;font-weight:600;text-align:left;padding-left:26px;height:56px;line-height:56px;color:#000}._container_eghx4_1 ._right_eghx4_23{flex:1;width:0;background-color:#fff;padding:6px 24px 24px}._diy-scrollbar_12blq_1::-webkit-scrollbar{width:4px;height:4px}._diy-scrollbar_12blq_1::-webkit-scrollbar-track{background-color:transparent}._diy-scrollbar_12blq_1::-webkit-scrollbar-thumb{border-radius:10px;background:#ebecf0}._diy-scrollbar_12blq_1:hover::-webkit-scrollbar-thumb{border-right:1px solid #ebecf0;border-bottom:1px solid #ebecf0;background:#ebecf0}._sContainer_12blq_17{display:flex;flex-direction:column;height:calc(100vh - 126px)}._summary_12blq_22{background-color:var(--warning-bg);padding:8px 12px 12px;font-size:14px;font-weight:400;border-radius:4px;max-height:180px;overflow-y:scroll;border-left:4px solid var(--warning-color)}._summary_12blq_22::-webkit-scrollbar{width:4px;height:4px}._summary_12blq_22::-webkit-scrollbar-track{background-color:transparent}._summary_12blq_22::-webkit-scrollbar-thumb{border-radius:10px;background:#ebecf0}._summary_12blq_22:hover::-webkit-scrollbar-thumb{border-right:1px solid #ebecf0;border-bottom:1px solid #ebecf0;background:#ebecf0}._summary_12blq_22 ._title_12blq_48{color:var(--warning-color);display:block;font-size:28px;font-weight:600;margin-bottom:8px}._summary_12blq_22 ._yellowWords_12blq_55{font-weight:600;color:var(--warning-color)!important}._content_12blq_59{overflow-y:scroll;height:0px;flex:1;margin-top:32px;padding:0 24px 0 0;display:grid;grid-template-columns:repeat(3,1fr);gap:24px;justify-items:center}._content_12blq_59::-webkit-scrollbar{width:4px;height:4px}._content_12blq_59::-webkit-scrollbar-track{background-color:transparent}._content_12blq_59::-webkit-scrollbar-thumb{border-radius:10px;background:#ebecf0}._content_12blq_59:hover::-webkit-scrollbar-thumb{border-right:1px solid #ebecf0;border-bottom:1px solid #ebecf0;background:#ebecf0}._content_12blq_59 ._box_12blq_86{height:fit-content;width:-webkit-fill-available;border-radius:8px;background:var(--light-bg);padding:16px 24px;display:flex;flex-direction:column;gap:16px;justify-content:flex-start;position:relative;align-items:center}._content_12blq_59 ._box_12blq_86:hover{background-color:var(--hover-bg)}._content_12blq_59 ._box_12blq_86 ._label_12blq_102{font-size:32px;font-weight:600}._issuesBox_12blq_106{position:relative}._issuesBox_12blq_106 ._issuesCount_12blq_109{padding:0 4px;width:fit-content;height:16px;border-radius:50%;background-color:var(--failure-color);right:-16px;top:-8px;position:absolute;font-size:12px;font-weight:600;color:#fff;display:none}._issuesBox_12blq_106 ._show_12blq_123{display:flex;align-items:center;justify-content:center}._noResult_12blq_128{font-size:24px;font-weight:400;padding:24px;display:flex;flex-direction:column;align-items:center;justify-content:center;border-radius:8px;height:calc(100vh - 200px);background-color:var(--light-bg);color:var(--words-color)}._noResult_12blq_128 span{height:36px;line-height:36px;display:block;font-size:36px;font-weight:600;margin-bottom:12px}._noResult_12blq_128 ._noIcon_12blq_149{font-size:48px}._diy-scrollbar_xna4e_1::-webkit-scrollbar{width:4px;height:4px}._diy-scrollbar_xna4e_1::-webkit-scrollbar-track{background-color:transparent}._diy-scrollbar_xna4e_1::-webkit-scrollbar-thumb{border-radius:10px;background:#ebecf0}._diy-scrollbar_xna4e_1:hover::-webkit-scrollbar-thumb{border-right:1px solid #ebecf0;border-bottom:1px solid #ebecf0;background:#ebecf0}._reportContent_xna4e_17{height:fit-content;max-height:200px;overflow-y:scroll;border:1px solid var(--border-color);border-radius:4px;padding:0 16px 16px;margin-bottom:8px}._reportContent_xna4e_17::-webkit-scrollbar{width:4px;height:4px}._reportContent_xna4e_17::-webkit-scrollbar-track{background-color:transparent}._reportContent_xna4e_17::-webkit-scrollbar-thumb{border-radius:10px;background:#ebecf0}._reportContent_xna4e_17:hover::-webkit-scrollbar-thumb{border-right:1px solid #ebecf0;border-bottom:1px solid #ebecf0;background:#ebecf0}._reportContent_xna4e_17 ._report_xna4e_17{color:#000;line-height:24px;display:block;font-size:18px;font-weight:600;margin-bottom:20px}._reportContent_xna4e_17 ._title_xna4e_50{margin-top:16px;line-height:24px;display:block;font-size:16px;font-weight:600;margin-bottom:4px;color:#000c}._reportContent_xna4e_17 ._title_xna4e_50 ._icon_xna4e_59{margin-left:8px;display:inline-block;padding:0 6px;border-radius:10px;font-size:12px;font-weight:500}._reportContent_xna4e_17 ._title_xna4e_50 ._success_xna4e_67{background-color:var(--success-color);color:var(--success-bg)}._reportContent_xna4e_17 ._title_xna4e_50 ._warning_xna4e_71{background-color:var(--warning-color);color:var(--warning-bg)}._reportContent_xna4e_17 ._title_xna4e_50 ._fail_xna4e_75{background-color:var(--failure-color);color:var(--failure-bg)}._reportContent_xna4e_17 ._has_issues_xna4e_79{line-height:20px;display:block;font-size:14px;color:#0009}._onlyReportContent_xna4e_85{height:100%!important;max-height:100%!important}._markdown_3p4mr_1{color:var(--words-color)}._markdown_3p4mr_1 table{overflow:hidden;border-radius:16px;border-spacing:0;empty-cells:show;border-collapse:separate;border-bottom:0px;margin-top:8px;margin-bottom:8px}._markdown_3p4mr_1 table thead{background-color:var(--white)!important;color:#121316cc;text-align:left}._markdown_3p4mr_1 table thead th{background-color:#ebecf0!important;vertical-align:middle}._markdown_3p4mr_1 table td,._markdown_3p4mr_1 table th{background-color:var(--white-7);padding:8px 24px;border-width:0 0 0 1px;font-size:inherit;border-bottom:1px solid #EBECF0;margin:0;overflow:visible}._markdown_3p4mr_1 table th{position:relative}._markdown_3p4mr_1 table tr:first-child th:after{top:35%;right:0;content:"";position:absolute;border-right:1px solid #d7d8dd;height:30%}._markdown_3p4mr_1 table tr:first-child th:last-child:after{top:10%;right:0;content:"";position:absolute;border-right:0px!important;height:80%}._markdown_3p4mr_1 pre{font-family:PingFang SC!important;margin-top:4px;margin-bottom:8px}._markdown_3p4mr_1 h1:first-child,._markdown_3p4mr_1 p:first-child,._markdown_3p4mr_1 h2:first-child,._markdown_3p4mr_1 h3:first-child,._markdown_3p4mr_1 h4:first-child,._markdown_3p4mr_1 h5:first-child,._markdown_3p4mr_1 h6:first-child,._markdown_3p4mr_1 ol:first-child{padding-top:0}._markdown_3p4mr_1 h1:last-child{padding-bottom:0}._markdown_3p4mr_1 p:last-child{padding-bottom:0}._markdown_3p4mr_1 h2:last-child{padding-bottom:0}._markdown_3p4mr_1 h3:last-child{padding-bottom:0}._markdown_3p4mr_1 h4:last-child{padding-bottom:0}._markdown_3p4mr_1 h5:last-child{padding-bottom:0}._markdown_3p4mr_1 h6:last-child{padding-bottom:0}._markdown_3p4mr_1 ol:last-child{padding-bottom:0}._markdown_3p4mr_1 p,._markdown_3p4mr_1 h1,._markdown_3p4mr_1 h2,._markdown_3p4mr_1 h3,._markdown_3p4mr_1 h4,._markdown_3p4mr_1 h5,._markdown_3p4mr_1 h6,._markdown_3p4mr_1 ol,._markdown_3p4mr_1 ul,._markdown_3p4mr_1 li{box-sizing:border-box;margin:0;padding-top:4px;padding-bottom:4px}._markdown_3p4mr_1 h1{font-size:20px;line-height:30px;padding-top:20px}._markdown_3p4mr_1 h2{font-size:18px;line-height:28px;padding-top:16px}._markdown_3p4mr_1 h3,._markdown_3p4mr_1 h4,._markdown_3p4mr_1 h5,._markdown_3p4mr_1 h6{font-size:16px;line-height:24px;padding-top:12px}._markdown_3p4mr_1 ul{padding-left:22px}._markdown_3p4mr_1 ul li{list-style-type:circle!important}._markdown_3p4mr_1 ul ol li{list-style-type:decimal!important}._markdown_3p4mr_1 ol{margin-left:36px;margin-inline-start:0px}._markdown_3p4mr_1 ol li{list-style-type:decimal!important}._markdown_3p4mr_1 ol ul li{list-style-type:circle!important}._markdown_3p4mr_1 a{text-decoration:none;color:var(--blue-6)}._markdown_3p4mr_1 hr{margin:24px 0;border:0px;border-bottom:1px solid var(--grey-4)}._markdown_3p4mr_1 code{font-family:SF Mono}._container_t5k03_1{display:flex;flex-direction:column;color:#000}._container_t5k03_1 ._sContainer_t5k03_6{display:flex;flex-direction:column}._subContainer_t5k03_10{display:flex;flex-direction:column;height:calc(100vh - 128px)}._noResult_t5k03_15{font-size:24px;font-weight:400;padding:24px;display:flex;flex-direction:column;align-items:center;justify-content:center;border-radius:8px;height:calc(100vh - 200px);background-color:var(--light-bg);color:var(--words-color)}._noResult_t5k03_15 span{height:36px;line-height:36px;display:block;font-size:36px;font-weight:600;margin-bottom:12px}._noResult_t5k03_15 ._noIcon_t5k03_36{font-size:48px}._badge_t5k03_39{margin-left:8px;display:inline-block;padding:2px 6px;border-radius:10px;font-size:12px;font-weight:500}._badgeSuccess_t5k03_47{background-color:var(--success-bg);color:var(--success-color)}._badgeFailure_t5k03_51{background-color:var(--failure-bg);color:var(--failure-color)}._badgeWarning_t5k03_55{background-color:var(--warning-bg);color:var(--warning-color)}._diy-scrollbar_1kd9e_1::-webkit-scrollbar{width:4px;height:4px}._diy-scrollbar_1kd9e_1::-webkit-scrollbar-track{background-color:transparent}._diy-scrollbar_1kd9e_1::-webkit-scrollbar-thumb{border-radius:10px;background:#ebecf0}._diy-scrollbar_1kd9e_1:hover::-webkit-scrollbar-thumb{border-right:1px solid #ebecf0;border-bottom:1px solid #ebecf0;background:#ebecf0}._container_1kd9e_17{display:flex;gap:20px;height:calc(100% - 218px);flex:1}._container_1kd9e_17 ._badge_1kd9e_23{margin-left:8px;display:inline-block;padding:2px 6px;border-radius:10px;font-size:12px;font-weight:500}._container_1kd9e_17 ._badgeSuccess_1kd9e_31{background-color:var(--success-bg);color:var(--success-color)}._container_1kd9e_17 ._badgeFailure_1kd9e_35{background-color:var(--failure-bg);color:var(--failure-color)}._container_1kd9e_17 ._badgeWarning_1kd9e_39{background-color:var(--warning-bg);color:var(--warning-color)}._container_1kd9e_17 ._screenshots_1kd9e_43{overflow-y:scroll;width:600px;flex:1}._container_1kd9e_17 ._screenshots_1kd9e_43::-webkit-scrollbar{width:4px;height:4px}._container_1kd9e_17 ._screenshots_1kd9e_43::-webkit-scrollbar-track{background-color:transparent}._container_1kd9e_17 ._screenshots_1kd9e_43::-webkit-scrollbar-thumb{border-radius:10px;background:#ebecf0}._container_1kd9e_17 ._screenshots_1kd9e_43:hover::-webkit-scrollbar-thumb{border-right:1px solid #ebecf0;border-bottom:1px solid #ebecf0;background:#ebecf0}._container_1kd9e_17 ._screenshots_1kd9e_43 ._screenshotWrapper_1kd9e_64{margin-bottom:20px;border:1px solid var(--border-color);border-radius:4px;overflow:hidden;scroll-margin-top:20px}._container_1kd9e_17 ._screenshots_1kd9e_43 ._screenshotTitle_1kd9e_71{padding:10px;background-color:var(--light-bg);border-bottom:1px solid var(--border-color);font-weight:500}._container_1kd9e_17 ._screenshots_1kd9e_43 ._screenshot_1kd9e_43{max-width:100%;height:auto;display:block}._container_1kd9e_17 ._left_1kd9e_82{width:250px;min-width:200px;max-width:400px;border:1px solid var(--border-color);overflow-y:auto;padding:10px;border-radius:4px;flex-shrink:0}._container_1kd9e_17 ._left_1kd9e_82::-webkit-scrollbar{width:4px;height:4px}._container_1kd9e_17 ._left_1kd9e_82::-webkit-scrollbar-track{background-color:transparent}._container_1kd9e_17 ._left_1kd9e_82::-webkit-scrollbar-thumb{border-radius:10px;background:#ebecf0}._container_1kd9e_17 ._left_1kd9e_82:hover::-webkit-scrollbar-thumb{border-right:1px solid #ebecf0;border-bottom:1px solid #ebecf0;background:#ebecf0}._container_1kd9e_17 ._left_1kd9e_82 ._stepItem_1kd9e_108{padding:12px 15px;background-color:#fff;border:1px solid var(--border-color);border-radius:4px;margin-bottom:10px;cursor:pointer;transition:all .2s}._container_1kd9e_17 ._left_1kd9e_82 ._stepItem_1kd9e_108:hover{background-color:var(--hover-bg)}._container_1kd9e_17 ._left_1kd9e_82 ._stepItem_1kd9e_108 ._stepNumber_1kd9e_120{font-weight:600;margin-bottom:5px;display:flex;justify-content:space-between;align-items:center}._container_1kd9e_17 ._left_1kd9e_82 ._stepItem_1kd9e_108 ._stepDescription_1kd9e_127{font-size:14px;color:var(--words-color)}._container_1kd9e_17 ._left_1kd9e_82 ._stepItem_1kd9e_108 ._stepTime_1kd9e_131{font-size:12px;color:var(--secondary-color);margin-top:5px}._container_1kd9e_17 ._left_1kd9e_82 ._activeItem_1kd9e_136{border-color:var(--active-color)}._container_1kd9e_17 ._left_1kd9e_82 ._activeItem_1kd9e_136 ._stepNumber_1kd9e_120{color:var(--active-color)!important}._container_1kd9e_17 ._right_1kd9e_142{width:300px;border:1px solid var(--border-color);padding:12px;border-radius:4px;overflow-y:scroll}._container_1kd9e_17 ._right_1kd9e_142::-webkit-scrollbar{width:4px;height:4px}._container_1kd9e_17 ._right_1kd9e_142::-webkit-scrollbar-track{background-color:transparent}._container_1kd9e_17 ._right_1kd9e_142::-webkit-scrollbar-thumb{border-radius:10px;background:#ebecf0}._container_1kd9e_17 ._right_1kd9e_142:hover::-webkit-scrollbar-thumb{border-right:1px solid #ebecf0;border-bottom:1px solid #ebecf0;background:#ebecf0}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165{margin-bottom:30px}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165 ._actionCard_1kd9e_168{cursor:pointer;transition:background-color .2s}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165 ._actionCard_1kd9e_168:hover{background-color:var(--hover-bg)}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165 ._success_1kd9e_175{color:var(--success-color);border-color:var(--success-color)}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165 ._failure_1kd9e_179{color:var(--failure-color);border-color:var(--failure-color)}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165 ._actionHeader_1kd9e_183{padding:10px 15px;display:flex;justify-content:space-between;align-items:center;border-bottom:1px solid var(--border-color)}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165 ._actionTitle_1kd9e_190{font-weight:500}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165 ._actionResult_1kd9e_193{font-size:13px;font-weight:500}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165 ._actionResultSuccess_1kd9e_197{color:var(--success-color)}._container_1kd9e_17 ._right_1kd9e_142 ._actionsList_1kd9e_165 ._actionResultFailure_1kd9e_200{color:var(--failure-color)}._container_1kd9e_17 ._right_1kd9e_142 ._modelIOTitle_1kd9e_203{font-size:16px;font-weight:600;margin-bottom:4px}._container_1tejq_1{display:flex;flex-direction:column}._container_1tejq_1 ._name_1tejq_5{color:#000;font-size:18px;font-weight:600;line-height:60px;height:60px}
webqa_agent/static/i18n/en-US.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aggregator": {
3
+ "assessment_overview": "Overview",
4
+ "summary_and_advice": "Summary & Advice",
5
+ "issue_list": "Issue List",
6
+ "issue_tracker_list": "Issue Tracker List",
7
+ "issue_list_note": "Note: This list summarizes all detected \"failed\" and \"warning\" items",
8
+ "assessment_categories": "Test Cases",
9
+ "passed_count": "Passed",
10
+ "failed_count": "Failed",
11
+ "test_failed_prefix": "Test Failed: ",
12
+ "execution_error_prefix": "Execution Error: ",
13
+ "llm_prompt_main": "You are an experienced software testing analyst. Please read the following sub-test information and extract [Issue Content], [Issue Count], and [Severity]:\n1) If status = pass, return JSON {\"issue_count\": 0}.\n2) If status != pass, judge based on the specific content of report, metrics, or final_summary:\n - Extract the most critical one-sentence issue description as issues\n - Count issue quantity as issue_count (if unable to count accurately, default to 1)\n - Severity assessment: First check if severity is already marked in the report (like high/medium/low, critical/major/minor, etc.), if so, follow directly; if not clearly marked in report, judge based on issue impact: high (serious impact on functionality/performance), medium (moderate impact), low (minor issues/warnings)\n3) You cannot output any other content or code blocks, only output unified JSON: {\"issue_count\": <number>, \"issues\": \"one-sentence English issue description\", \"severity\": \"high|medium|low\"}.",
14
+ "llm_prompt_test_info": "Sub-test information: "
15
+ },
16
+ "testers": {
17
+ "basic": {
18
+ "basic_test_display": "Basic Function Test - ",
19
+ "accessibility_check": "Accessibility Check",
20
+ "main_link_check": "Main Link Check",
21
+ "sub_link_check": "Sub Link Check",
22
+ "test_results": "Test Results",
23
+ "clickable_element_check": "Clickable Element Traversal Check",
24
+ "click_element": "Click Element",
25
+ "traversal_test_results": "Traversal Test Results",
26
+ "clickable_elements_count": "Clickable elements",
27
+ "click_failed_count": ", click actions failed"
28
+ },
29
+ "performance": {
30
+ "core_metrics": "Core Web Vitals",
31
+ "improve": "Improve",
32
+ "current_value": "Current value",
33
+ "target": "Target",
34
+ "performance_optimization": "Performance Optimization",
35
+ "potential_savings": "Potential savings",
36
+ "resource_optimization": "Resource Optimization",
37
+ "reduce_total_size": "Reduce total page size",
38
+ "current": "Current",
39
+ "optimize_third_party": "Optimize third-party resource usage",
40
+ "performance_diagnosis": "Performance Diagnosis",
41
+ "seo": "SEO",
42
+ "images": "images",
43
+ "links": "links",
44
+ "example": "e.g.",
45
+ "overall_score": "Overall Score",
46
+ "issues_to_improve": "Issues to Improve",
47
+ "performance_metrics": "Performance Metrics"
48
+ },
49
+ "ux": {
50
+ "ux_test_display": "UX Test - ",
51
+ "text_check_name": "Text Check",
52
+ "page_blank_error": "Page is blank, no visible content",
53
+ "no_issues_found": "No issues found",
54
+ "report_title": "Text Check",
55
+ "overall_problem": "**Overall Problem:** ",
56
+ "issue_details": "### {}. Issue Details\n\n",
57
+ "location": "**Location:** ",
58
+ "error_content": "**Current Error:** ",
59
+ "suggested_fix": "**Suggested Fix:** ",
60
+ "error_type": "**Error Type:** ",
61
+ "unknown_location": "Unknown Location",
62
+ "unknown_type": "Unknown Type",
63
+ "layout_check_name": "Web Content Check",
64
+ "element_check_name": "Web Element Check",
65
+ "report_title_content": "Content Check",
66
+ "layout_case": "Layout Check",
67
+ "image_case": "Element Check",
68
+ "text_case": "Text Typography"
69
+ },
70
+ "security": {
71
+ "cve_scan": "Known CVE Vulnerability Scan",
72
+ "xss_scan": "Cross-site Scripting Detection",
73
+ "sqli_scan": "SQL Injection Detection",
74
+ "rce_scan": "Remote Code Execution Detection",
75
+ "lfi_scan": "Local File Inclusion Detection",
76
+ "ssrf_scan": "Server-side Request Forgery Detection",
77
+ "redirect_scan": "Open Redirect Detection",
78
+ "exposure_scan": "Sensitive Information Exposure Detection",
79
+ "config_scan": "Configuration Error Detection",
80
+ "default_login_scan": "Default Credentials Detection",
81
+ "ssl_scan": "SSL/TLS Configuration Detection",
82
+ "dns_scan": "DNS Related Detection",
83
+ "subdomain_takeover_scan": "Subdomain Takeover Detection",
84
+ "tech_scan": "Technology Stack Identification",
85
+ "panel_scan": "Admin Panel Detection",
86
+ "http_protocol": "HTTP Protocol Scan",
87
+ "dns_protocol": "DNS Protocol Scan",
88
+ "tcp_protocol": "TCP Protocol Scan",
89
+ "ssl_protocol": "SSL Protocol Scan",
90
+ "critical_vulnerability": "Critical Vulnerability Scan",
91
+ "high_risk_vulnerability": "High Risk Vulnerability Scan",
92
+ "medium_risk_vulnerability": "Medium Risk Vulnerability Scan",
93
+ "no_security_issues": "No related security issues found",
94
+ "found_issues": "Found {count} issues",
95
+ "including": "including",
96
+ "and_more": "and more",
97
+ "security_check": "Security Check",
98
+ "no_issues_found": "No issues found",
99
+ "nuclei_check": "Nuclei Check",
100
+ "nuclei_not_found": "Nuclei tool not found. Please install nuclei: go install -v github.com/projectdiscovery/nuclei/v3/cmd/nuclei@latest",
101
+ "no_severity_issues": "No {severity} level security issues found",
102
+ "found_severity_issues": "Found {count} {severity} level security issues",
103
+ "severity_level_scan": "{severity} Level Security Issues Scan",
104
+ "severity_level_vulnerability": "{severity} Level Security Vulnerability Scan",
105
+ "matched_at": "Matched at",
106
+ "extracted": "Extracted",
107
+ "no_details": "No further details"
108
+ },
109
+ "ai_function": {
110
+ "intelligent_function_test": "Intelligent Function Test"
111
+ }
112
+ },
113
+ "common": {
114
+ "level": "level",
115
+ "issues": "issues"
116
+ },
117
+ "display": {
118
+ "completed_tasks": "🎉 Completed Tasks",
119
+ "running_tasks": "🚀 Running Tasks",
120
+ "total_time": "⏱️ Total Time",
121
+ "error_tasks": "⚠️ Error Tasks:",
122
+ "error_message": "Error Message:",
123
+ "task_execution_summary": "📊 Task Execution Summary",
124
+ "no_issues_found": "No issues found"
125
+ }
126
+ }
127
+